lorekeep 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lorekeep/config.py ADDED
@@ -0,0 +1,39 @@
1
+ """Load Lorekeep config. Path resolved by paths.resolve_paths() (dev .lorekeep/, LOREKEEP_HOME, or XDG)."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ import yaml
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class ProviderConfig(BaseModel):
11
+ backend: str = "openai" # openai | anthropic | ollama | <litellm prefix>
12
+ model: str = "gpt-4o-mini"
13
+ api_base: str | None = None # set for ollama or openai-compatible endpoints
14
+ api_key_env: str | None = None # env var holding the api key (else litellm default)
15
+ api_key: str | None = None # inline key (gitignored config only; env is safer)
16
+ temperature: float = 0.0
17
+
18
+
19
+ class CompileConfig(BaseModel):
20
+ chunk_lines: int = 60
21
+
22
+
23
+ class NsConfig(BaseModel):
24
+ default: list[str] = Field(default_factory=lambda: ["public"])
25
+ token_map: dict[str, list[str]] = Field(default_factory=dict)
26
+
27
+
28
+ class Config(BaseModel):
29
+ provider: ProviderConfig = Field(default_factory=ProviderConfig)
30
+ compile: CompileConfig = Field(default_factory=CompileConfig)
31
+ ns: NsConfig = Field(default_factory=NsConfig)
32
+ install_source: str | None = None # pypi | local | git+URL | path
33
+
34
+
35
+ def load_config(path: Path) -> Config:
36
+ if not path.exists():
37
+ return Config()
38
+ data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
39
+ return Config.model_validate(data)
lorekeep/defaults.py ADDED
@@ -0,0 +1,44 @@
1
+ """Default config + schema used by `lorekeep init` to bootstrap a fresh home."""
2
+ from __future__ import annotations
3
+
4
+ DEFAULT_SCHEMA = {
5
+ "version": 2,
6
+ "node_types": {
7
+ "service": {"props": {"name": "string", "lang": "string"}},
8
+ "team": {"props": {"name": "string"}},
9
+ "decision": {"props": {"title": "string"}},
10
+ "project": {"props": {"name": "string", "status": "string"}},
11
+ "person": {"props": {"name": "string", "role": "string"}},
12
+ "tool": {"props": {"name": "string", "category": "string"}},
13
+ "command": {"props": {"name": "string", "platform": "string"}},
14
+ "concept": {"props": {"name": "string", "domain": "string"}},
15
+ "note": {"props": {"title": "string", "topic": "string"}},
16
+ "document": {"props": {"title": "string", "kind": "string"}},
17
+ },
18
+ "edge_types": {
19
+ "depends_on": {"from": "service", "to": "service"},
20
+ "decided_by": {"from": "decision", "to": "team"},
21
+ "owns": {"from": "team", "to": "service"},
22
+ "part_of": {"from": "service", "to": "project"},
23
+ "uses": {"from": "service", "to": "tool"},
24
+ "mentions": {"from": "note", "to": "concept"},
25
+ "documents": {"from": "document", "to": "concept"},
26
+ "describes": {"from": "note", "to": "service"},
27
+ "relates_to": {"from": "concept", "to": "concept"},
28
+ },
29
+ }
30
+
31
+ DEFAULT_CONFIG_YAML = """\
32
+ provider:
33
+ backend: openai
34
+ model: openai/gpt-4o-mini
35
+ api_base: null
36
+ api_key_env: OPENAI_API_KEY
37
+ api_key: null
38
+ temperature: 0.0
39
+ compile:
40
+ chunk_lines: 60
41
+ ns:
42
+ default: [public]
43
+ install_source: pypi
44
+ """
File without changes
@@ -0,0 +1,97 @@
1
+ """Tier-1 construction-quality evaluation vs a gold corpus."""
2
+ from __future__ import annotations
3
+
4
+ from itertools import combinations
5
+ from pathlib import Path
6
+
7
+ from lorekeep.eval.gold import edge_key, load_compiled, load_gold, node_key
8
+ from lorekeep.models import Edge, Node
9
+
10
+
11
+ def precision_recall_f1(gold: set, got: set) -> tuple[float, float, float]:
12
+ if not gold and not got:
13
+ return 1.0, 1.0, 1.0
14
+ tp = len(gold & got)
15
+ p = tp / len(got) if got else 0.0
16
+ r = tp / len(gold) if gold else 0.0
17
+ f1 = (2 * p * r / (p + r)) if (p + r) else 0.0
18
+ return p, r, f1
19
+
20
+
21
+ def extraction_report(graph_dir: Path, gold_dir: Path) -> dict:
22
+ compiled = load_compiled(graph_dir)
23
+ gold = load_gold(gold_dir)
24
+
25
+ c_nodes = [f for f in compiled if isinstance(f, Node)]
26
+ c_edges = [f for f in compiled if isinstance(f, Edge)]
27
+ g_nodes = [f for f in gold if isinstance(f, Node)]
28
+ g_edges = [f for f in gold if isinstance(f, Edge)]
29
+
30
+ c_ids = {n.id: n for n in c_nodes}
31
+ g_ids = {n.id: n for n in g_nodes}
32
+
33
+ c_node_keys = {node_key(n) for n in c_nodes}
34
+ g_node_keys = {node_key(n) for n in g_nodes}
35
+ c_edge_keys = {edge_key(e, c_ids) for e in c_edges}
36
+ g_edge_keys = {edge_key(e, g_ids) for e in g_edges}
37
+
38
+ np, nr, nf = precision_recall_f1(g_node_keys, c_node_keys)
39
+ ep, er, ef = precision_recall_f1(g_edge_keys, c_edge_keys)
40
+ return {
41
+ "nodes": {"precision": np, "recall": nr, "f1": nf},
42
+ "edges": {"precision": ep, "recall": er, "f1": ef},
43
+ }
44
+
45
+
46
+ def _clusters_from_aliases(alias_groups: list[dict]) -> list[set[str]]:
47
+ """Each gold group -> set of surface names that should be one entity."""
48
+ return [set(g["aliases"]) for g in alias_groups]
49
+
50
+
51
+ def _compiled_clusters(compiled_nodes: list) -> list[set[str]]:
52
+ """Group compiled node names by their id (canonical entity)."""
53
+ by_id: dict[str, set[str]] = {}
54
+ for n in compiled_nodes:
55
+ nm = n.props.get("name", n.id)
56
+ by_id.setdefault(n.id, set()).add(nm)
57
+ return list(by_id.values())
58
+
59
+
60
+ def entity_resolution_f1(compiled_nodes: list, gold_alias_groups: list[dict]) -> dict:
61
+ """Pairwise coreference F1: for every pair of mentions, did compiled agree with gold?"""
62
+ gold_pairs: set[frozenset] = set()
63
+ for cluster in _clusters_from_aliases(gold_alias_groups):
64
+ for a, b in combinations(sorted(cluster), 2):
65
+ gold_pairs.add(frozenset((a, b)))
66
+
67
+ got_pairs: set[frozenset] = set()
68
+ for cluster in _compiled_clusters(compiled_nodes):
69
+ for a, b in combinations(sorted(cluster), 2):
70
+ got_pairs.add(frozenset((a, b)))
71
+
72
+ p, r, f1 = precision_recall_f1(gold_pairs, got_pairs)
73
+ return {"precision": p, "recall": r, "f1": f1}
74
+
75
+
76
+ def structure_report(facts_dir: Path) -> dict:
77
+ """Graph-shape metrics: counts, avg degree, density, dangling-edge rate."""
78
+ # accept either a gold dir or a compiled graph dir
79
+ if (facts_dir / "facts.jsonl").exists():
80
+ facts = load_compiled(facts_dir)
81
+ else:
82
+ facts = load_gold(facts_dir)
83
+ nodes = [f for f in facts if isinstance(f, Node)]
84
+ edges = [f for f in facts if isinstance(f, Edge)]
85
+ node_ids = {n.id for n in nodes}
86
+ dangling = sum(1 for e in edges if e.from_ not in node_ids or e.to not in node_ids)
87
+ n = len(nodes)
88
+ e = len(edges)
89
+ avg_degree = (e / n) if n else 0.0
90
+ density = (e / (n * (n - 1))) if n > 1 else 0.0
91
+ return {
92
+ "node_count": n,
93
+ "edge_count": e,
94
+ "avg_degree": round(avg_degree, 4),
95
+ "density": round(density, 4),
96
+ "dangling_edge_rate": round(dangling / e, 4) if e else 0.0,
97
+ }
lorekeep/eval/gold.py ADDED
@@ -0,0 +1,31 @@
1
+ """Load gold + compiled facts, and define match keys for evaluation."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ from lorekeep.facts_io import read_facts
7
+ from lorekeep.models import Edge, Node
8
+
9
+
10
+ def load_gold(gold_dir: Path) -> list[Node | Edge]:
11
+ """Load every *.facts.jsonl under gold_dir."""
12
+ facts: list[Node | Edge] = []
13
+ for p in sorted(gold_dir.glob("**/*.facts.jsonl")):
14
+ facts.extend(read_facts(p))
15
+ return facts
16
+
17
+
18
+ def load_compiled(graph_dir: Path) -> list[Node | Edge]:
19
+ return read_facts(graph_dir / "facts.jsonl")
20
+
21
+
22
+ def node_key(n: Node) -> tuple[str, str]:
23
+ return (n.type, n.props.get("name", n.id))
24
+
25
+
26
+ def edge_key(e: Edge, nodes_by_id: dict[str, Node]) -> tuple[str, str, str]:
27
+ f = nodes_by_id.get(e.from_)
28
+ t = nodes_by_id.get(e.to)
29
+ fn = f.props.get("name", e.from_) if f else e.from_
30
+ tn = t.props.get("name", e.to) if t else e.to
31
+ return (e.type, fn, tn)
@@ -0,0 +1,46 @@
1
+ """Tier-2 retrieval/temporal eval: minimal harness (not full benchmark datasets).
2
+
3
+ Loads a fixture graph + a small JSON question set, runs the scoped query path,
4
+ and checks expected node ids / edge-type presence per question. Full
5
+ HotpotQA/CronQuestions adaptation is deferred (spec §16 Tier 2).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+
12
+ from lorekeep.perm.ns import ScopedGraph
13
+ from lorekeep.store.graph import GraphStore, parse_date
14
+
15
+
16
+ def retrieval_report(graph_dir: Path, questions_path: Path, allowed_ns) -> dict:
17
+ store = GraphStore.from_jsonl(Path(graph_dir) / "facts.jsonl")
18
+ scoped = ScopedGraph(store, allowed_ns)
19
+ questions = json.loads(Path(questions_path).read_text())
20
+
21
+ total = len(questions)
22
+ failures = []
23
+ for q in questions:
24
+ ok = _check(scoped, q)
25
+ if not ok:
26
+ failures.append(q["id"])
27
+ return {"total": total, "passed": total - len(failures), "failures": failures}
28
+
29
+
30
+ def _check(scoped: ScopedGraph, q: dict) -> bool:
31
+ kind = q["kind"]
32
+ if kind == "multihop":
33
+ res = scoped.neighbors(q["start"], depth=q.get("depth", 1))
34
+ got = {n.id for n in res["nodes"]}
35
+ return set(q["expect_node_ids"]).issubset(got)
36
+ if kind == "temporal":
37
+ _, edges = scoped.snapshot(parse_date(q["time"]))
38
+ types = {e.type for e in edges}
39
+ if "expect_edge_types_present" in q:
40
+ if not set(q["expect_edge_types_present"]).issubset(types):
41
+ return False
42
+ if "expect_edge_types_absent" in q:
43
+ if set(q["expect_edge_types_absent"]) & types:
44
+ return False
45
+ return True
46
+ return False
lorekeep/facts_io.py ADDED
@@ -0,0 +1,22 @@
1
+ """Shared loader for facts.jsonl -> list[Node|Edge]. Used by store + eval."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from lorekeep.models import Edge, Node
8
+
9
+
10
+ def read_facts(path: Path) -> list[Node | Edge]:
11
+ """Read a facts.jsonl file (one JSON object per line) into typed facts."""
12
+ facts: list[Node | Edge] = []
13
+ for line in Path(path).read_text(encoding="utf-8").splitlines():
14
+ line = line.strip()
15
+ if not line:
16
+ continue
17
+ d = json.loads(line)
18
+ if d["kind"] == "node":
19
+ facts.append(Node.model_validate(d))
20
+ else:
21
+ facts.append(Edge.model_validate(d))
22
+ return facts
File without changes
@@ -0,0 +1,19 @@
1
+ """Claude Code MCP config writer (.mcp.json)."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+
7
+
8
+ def write_config(target_dir: Path, command: str, args: list[str], ns: str | None) -> Path:
9
+ entry = {"command": command, "args": args}
10
+ if ns:
11
+ entry["env"] = {"LOREKEEP_NS": ns}
12
+ path = Path(target_dir) / ".mcp.json"
13
+ existing = {}
14
+ if path.exists():
15
+ existing = json.loads(path.read_text())
16
+ servers = existing.get("mcpServers", {})
17
+ servers["lorekeep"] = entry
18
+ path.write_text(json.dumps({"mcpServers": servers}, indent=2))
19
+ return path
@@ -0,0 +1,56 @@
1
+ """Codex MCP config writer (config.toml [mcp_servers.lorekeep]).
2
+
3
+ Idempotent: re-running replaces the existing [mcp_servers.lorekeep] block instead
4
+ of appending a duplicate. Values are escaped so a stray quote/backslash in the
5
+ namespace or command can't break the generated TOML.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ _HEADER = "[mcp_servers.lorekeep]"
12
+
13
+
14
+ def _toml_escape(s: str) -> str:
15
+ return s.replace("\\", "\\\\").replace('"', '\\"')
16
+
17
+
18
+ def _toml_quote_list(items: list[str]) -> str:
19
+ return "[" + ", ".join(f'"{_toml_escape(i)}"' for i in items) + "]"
20
+
21
+
22
+ def _lorekeep_block(command: str, args: list[str], ns: str | None) -> str:
23
+ lines = [
24
+ _HEADER,
25
+ f'command = "{_toml_escape(command)}"',
26
+ f"args = {_toml_quote_list(args)}",
27
+ ]
28
+ if ns:
29
+ lines.append(f'env = {{ LOREKEEP_NS = "{_toml_escape(ns)}" }}')
30
+ return "\n".join(lines)
31
+
32
+
33
+ def write_config(target_dir: Path, command: str, args: list[str], ns: str | None) -> Path:
34
+ if ns and ("\n" in ns or "\r" in ns):
35
+ raise ValueError("namespace must not contain newlines")
36
+ path = Path(target_dir) / "config.toml"
37
+ block = _lorekeep_block(command, args, ns)
38
+ text = path.read_text() if path.exists() else ""
39
+ lines = text.splitlines()
40
+ header_idx = next((i for i, l in enumerate(lines) if l.strip() == _HEADER), -1)
41
+ if header_idx == -1:
42
+ sep = "\n\n" if text.strip() else ""
43
+ new_text = text + sep + block + "\n"
44
+ else:
45
+ end = len(lines)
46
+ for i in range(header_idx + 1, len(lines)):
47
+ if lines[i].startswith("["): # next top-level table
48
+ end = i
49
+ break
50
+ before = lines[:header_idx]
51
+ after = lines[end:]
52
+ rebuilt = before + [block] + ([""] + after if after else [])
53
+ new_text = "\n".join(rebuilt) + "\n"
54
+ path.parent.mkdir(parents=True, exist_ok=True)
55
+ path.write_text(new_text)
56
+ return path
@@ -0,0 +1,23 @@
1
+ """Shared integration helpers: resolve install command + agent-memory snippet."""
2
+ from __future__ import annotations
3
+
4
+
5
+ def resolve_command(install_source: str | None) -> tuple[str, list[str]]:
6
+ """Return (command, args) to launch `lorekeep serve --transport stdio`."""
7
+ serve_args = ["serve", "--transport", "stdio"]
8
+ if not install_source or install_source == "pypi":
9
+ return ("uvx", ["lorekeep", *serve_args])
10
+ if install_source == "local":
11
+ return ("lorekeep", serve_args)
12
+ # anything else (git+URL, local path) -> uvx --from <source>
13
+ return ("uvx", ["--from", install_source, "lorekeep", *serve_args])
14
+
15
+
16
+ def agent_memory_snippet() -> str:
17
+ return (
18
+ "## Lorekeep knowledge base (MCP)\n"
19
+ "Before answering architecture/code/domain questions, query Lorekeep:\n"
20
+ "search(q) -> get_node(id) -> neighbors / at_time / history as needed.\n"
21
+ "Always cite `src` provenance. Knowledge is namespace-scoped - if a fact is\n"
22
+ "missing, it may be outside your scope, not nonexistent.\n"
23
+ )
@@ -0,0 +1,21 @@
1
+ """Cursor MCP config writer (.cursor/mcp.json)."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+
7
+
8
+ def write_config(target_dir: Path, command: str, args: list[str], ns: str | None) -> Path:
9
+ entry = {"command": command, "args": args}
10
+ if ns:
11
+ entry["env"] = {"LOREKEEP_NS": ns}
12
+ d = Path(target_dir) / ".cursor"
13
+ d.mkdir(parents=True, exist_ok=True)
14
+ path = d / "mcp.json"
15
+ existing = {}
16
+ if path.exists():
17
+ existing = json.loads(path.read_text())
18
+ servers = existing.get("mcpServers", {})
19
+ servers["lorekeep"] = entry
20
+ path.write_text(json.dumps({"mcpServers": servers}, indent=2))
21
+ return path
lorekeep/mcp_server.py ADDED
@@ -0,0 +1,120 @@
1
+ """FastMCP server exposing the scoped temporal graph, read-only.
2
+
3
+ Tools are plain module functions using a module-global ScopedGraph set by
4
+ configure(). @mcp.tool() registers them with FastMCP but they remain directly
5
+ callable, so tests invoke them without the MCP transport.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ from mcp.server.fastmcp import FastMCP
12
+
13
+ from lorekeep.models import Schema
14
+ from lorekeep.perm.ns import ScopedGraph
15
+ from lorekeep.schema_io import load_schema
16
+ from lorekeep.store.graph import GraphStore, parse_date
17
+
18
+ mcp = FastMCP("lorekeep")
19
+
20
+ _state: dict = {} # graph_dir, allowed_ns, schema_path, facts_mtime
21
+ _scope: ScopedGraph | None = None
22
+ _schema: Schema | None = None
23
+
24
+
25
+ def configure(graph_dir, allowed_ns, schema_path=None, fts_path=None) -> None:
26
+ """Set the graph location + scope, then build. Safe to call again to refresh."""
27
+ _state["graph_dir"] = Path(graph_dir)
28
+ _state["allowed_ns"] = list(allowed_ns)
29
+ _state["schema_path"] = Path(schema_path) if schema_path else None
30
+ _rebuild()
31
+
32
+
33
+ def _rebuild() -> None:
34
+ """(Re)load the graph + schema from disk into a fresh ScopedGraph."""
35
+ global _scope, _schema
36
+ facts = _state["graph_dir"] / "facts.jsonl"
37
+ store = GraphStore.from_jsonl(facts)
38
+ sp = _state.get("schema_path")
39
+ _schema = load_schema(sp) if sp else None
40
+ _scope = ScopedGraph(store, _state["allowed_ns"])
41
+ _state["facts_mtime"] = facts.stat().st_mtime if facts.exists() else 0
42
+
43
+
44
+ def _require() -> ScopedGraph:
45
+ """Return the scoped graph, lazy-reloading if facts.jsonl changed on disk."""
46
+ if not _state:
47
+ raise RuntimeError("mcp_server not configured; call configure() first")
48
+ facts = _state["graph_dir"] / "facts.jsonl"
49
+ mtime = facts.stat().st_mtime if facts.exists() else 0
50
+ if _scope is None or mtime != _state.get("facts_mtime"):
51
+ _rebuild()
52
+ return _scope
53
+
54
+
55
+ @mcp.tool()
56
+ def get_node(id: str) -> dict:
57
+ """Return a node by id (props + provenance), or error if absent/out of scope."""
58
+ node = _require().get_node(id)
59
+ if node is None:
60
+ return {"error": "not found or out of scope"}
61
+ return node.model_dump(mode="json", by_alias=True)
62
+
63
+
64
+ @mcp.tool()
65
+ def neighbors(id: str, edge_type: str = "", depth: int = 1) -> dict:
66
+ """Traverse neighbors up to depth (both directions), scoped to the caller."""
67
+ scoped = _require()
68
+ depth = max(1, min(int(depth), 5)) # bound BFS cost; 5 hops spans any realistic graph
69
+ res = scoped.neighbors(id, edge_type or None, depth)
70
+ return {
71
+ "nodes": [n.model_dump(mode="json", by_alias=True) for n in res["nodes"]],
72
+ "edges": [e.model_dump(mode="json", by_alias=True) for e in res["edges"]],
73
+ }
74
+
75
+
76
+ @mcp.tool()
77
+ def schema() -> dict:
78
+ """Return the graph schema (node/edge types)."""
79
+ if _schema is None:
80
+ return {"error": "no schema loaded"}
81
+ return _schema.model_dump(mode="json", by_alias=True)
82
+
83
+
84
+ @mcp.tool()
85
+ def list_namespaces() -> list:
86
+ """Namespaces visible to this caller."""
87
+ return _require().list_namespaces()
88
+
89
+
90
+ @mcp.tool()
91
+ def at_time(time: str) -> dict:
92
+ """Snapshot of facts valid at an ISO date (half-open [valid_from, valid_to))."""
93
+ scoped = _require()
94
+ nodes, edges = scoped.snapshot(parse_date(time))
95
+ return {
96
+ "nodes": [n.model_dump(mode="json", by_alias=True) for n in nodes],
97
+ "edges": [e.model_dump(mode="json", by_alias=True) for e in edges],
98
+ }
99
+
100
+
101
+ @mcp.tool()
102
+ def history(id: str) -> list:
103
+ """All versions of an entity + edges touching it, ordered by valid_from."""
104
+ return _require().history(id)
105
+
106
+
107
+ @mcp.tool()
108
+ def changes(from_t: str, to_t: str) -> dict:
109
+ """Edges whose validity began or ended within [from_t, to_t)."""
110
+ return _require().changes(parse_date(from_t), parse_date(to_t))
111
+
112
+
113
+ @mcp.tool()
114
+ def search(query: str, limit: int = 10) -> list:
115
+ """Text search over node ids/props, scoped to the caller."""
116
+ return _require().search(query, limit)
117
+
118
+
119
+ if __name__ == "__main__":
120
+ mcp.run()
lorekeep/models.py ADDED
@@ -0,0 +1,130 @@
1
+ """Core data models for Lorekeep. The shared contract across compile + eval."""
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ import json
6
+ from datetime import date
7
+ from typing import Any, Literal
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field
10
+
11
+
12
+ class DocChunk(BaseModel):
13
+ """A slice of a raw document, with provenance back to path:line."""
14
+ model_config = ConfigDict(frozen=True)
15
+
16
+ path: str
17
+ start_line: int # 1-based
18
+ end_line: int
19
+ text: str
20
+ namespace: str # e.g. "teams/backend"
21
+
22
+ @property
23
+ def src(self) -> str:
24
+ return f"{self.path}:{self.start_line}"
25
+
26
+ @property
27
+ def hash(self) -> str:
28
+ h = hashlib.sha256()
29
+ h.update(self.path.encode("utf-8"))
30
+ h.update(b"\n")
31
+ h.update(self.text.encode("utf-8"))
32
+ return h.hexdigest()
33
+
34
+
35
+ class Node(BaseModel):
36
+ model_config = ConfigDict(frozen=True, extra="forbid")
37
+ kind: Literal["node"] = "node"
38
+ id: str
39
+ type: str
40
+ ns: tuple[str, ...]
41
+ valid_from: date | None = None
42
+ valid_to: date | None = None
43
+ props: dict[str, Any] = Field(default_factory=dict)
44
+ src: tuple[str, ...] = Field(default_factory=tuple)
45
+
46
+ def to_json_line(self) -> str:
47
+ d = self.model_dump(mode="json", by_alias=True)
48
+ return json.dumps(d, sort_keys=True, ensure_ascii=False, separators=(",", ":"))
49
+
50
+
51
+ class Edge(BaseModel):
52
+ model_config = ConfigDict(frozen=True, extra="forbid", populate_by_name=True)
53
+ kind: Literal["edge"] = "edge"
54
+ id: str
55
+ type: str
56
+ from_: str = Field(alias="from")
57
+ to: str
58
+ ns: tuple[str, ...]
59
+ valid_from: date | None = None
60
+ valid_to: date | None = None
61
+ props: dict[str, Any] = Field(default_factory=dict)
62
+ src: tuple[str, ...] = Field(default_factory=tuple)
63
+
64
+ def to_json_line(self) -> str:
65
+ d = self.model_dump(mode="json", by_alias=True)
66
+ return json.dumps(d, sort_keys=True, ensure_ascii=False, separators=(",", ":"))
67
+
68
+
69
+ Fact = Node | Edge
70
+
71
+
72
+ class TypeSpec(BaseModel):
73
+ model_config = ConfigDict(frozen=True, extra="forbid")
74
+ props: dict[str, str] = Field(default_factory=dict)
75
+
76
+
77
+ class EndpointSpec(BaseModel):
78
+ model_config = ConfigDict(frozen=True, extra="forbid", populate_by_name=True)
79
+ from_: str = Field(alias="from")
80
+ to: str
81
+
82
+
83
+ class Schema(BaseModel):
84
+ model_config = ConfigDict(frozen=True, extra="forbid")
85
+ version: int
86
+ node_types: dict[str, TypeSpec]
87
+ edge_types: dict[str, EndpointSpec]
88
+
89
+ @classmethod
90
+ def load(cls, data: dict[str, Any]) -> "Schema":
91
+ return cls.model_validate(data)
92
+
93
+ def is_valid_node_type(self, t: str) -> bool:
94
+ return t in self.node_types
95
+
96
+ def is_valid_edge_type(self, t: str) -> bool:
97
+ return t in self.edge_types
98
+
99
+
100
+ class CompileError(BaseModel):
101
+ model_config = ConfigDict(frozen=True)
102
+ path: str
103
+ line: int
104
+ message: str
105
+
106
+
107
+ class QuarantineItem(BaseModel):
108
+ model_config = ConfigDict(frozen=True)
109
+ fact: dict[str, Any]
110
+ reason: str
111
+
112
+
113
+ class Manifest(BaseModel):
114
+ model_config = ConfigDict(extra="forbid")
115
+ schema_version: int
116
+ chunk_count: int
117
+ node_count: int
118
+ edge_count: int
119
+ run_id: str
120
+ facts_hash: str
121
+ chunk_hashes: dict[str, list[str]] = Field(default_factory=dict)
122
+ errors: list[CompileError] = Field(default_factory=list)
123
+ quarantine: list[QuarantineItem] = Field(default_factory=list)
124
+
125
+ def to_json(self) -> str:
126
+ return json.dumps(self.model_dump(mode="json"), sort_keys=True, indent=2)
127
+
128
+ @classmethod
129
+ def from_json(cls, text: str) -> "Manifest":
130
+ return cls.model_validate(json.loads(text))