lorekeep 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lorekeep/__init__.py +3 -0
- lorekeep/cli.py +229 -0
- lorekeep/compile/__init__.py +0 -0
- lorekeep/compile/extract.py +150 -0
- lorekeep/compile/ingest.py +55 -0
- lorekeep/compile/providers.py +49 -0
- lorekeep/compile/resolve.py +111 -0
- lorekeep/compile/writer.py +63 -0
- lorekeep/config.py +39 -0
- lorekeep/defaults.py +44 -0
- lorekeep/eval/__init__.py +0 -0
- lorekeep/eval/construction.py +97 -0
- lorekeep/eval/gold.py +31 -0
- lorekeep/eval/retrieval.py +46 -0
- lorekeep/facts_io.py +22 -0
- lorekeep/integrations/__init__.py +0 -0
- lorekeep/integrations/claude_code.py +19 -0
- lorekeep/integrations/codex.py +56 -0
- lorekeep/integrations/common.py +23 -0
- lorekeep/integrations/cursor.py +21 -0
- lorekeep/mcp_server.py +120 -0
- lorekeep/models.py +130 -0
- lorekeep/paths.py +58 -0
- lorekeep/perm/__init__.py +0 -0
- lorekeep/perm/ns.py +113 -0
- lorekeep/pipeline.py +67 -0
- lorekeep/schema_io.py +12 -0
- lorekeep/store/__init__.py +0 -0
- lorekeep/store/fts.py +54 -0
- lorekeep/store/graph.py +137 -0
- lorekeep-0.1.0.dist-info/METADATA +246 -0
- lorekeep-0.1.0.dist-info/RECORD +35 -0
- lorekeep-0.1.0.dist-info/WHEEL +4 -0
- lorekeep-0.1.0.dist-info/entry_points.txt +2 -0
- lorekeep-0.1.0.dist-info/licenses/LICENSE +21 -0
lorekeep/config.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Load Lorekeep config. Path resolved by paths.resolve_paths() (dev .lorekeep/, LOREKEEP_HOME, or XDG)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ProviderConfig(BaseModel):
|
|
11
|
+
backend: str = "openai" # openai | anthropic | ollama | <litellm prefix>
|
|
12
|
+
model: str = "gpt-4o-mini"
|
|
13
|
+
api_base: str | None = None # set for ollama or openai-compatible endpoints
|
|
14
|
+
api_key_env: str | None = None # env var holding the api key (else litellm default)
|
|
15
|
+
api_key: str | None = None # inline key (gitignored config only; env is safer)
|
|
16
|
+
temperature: float = 0.0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CompileConfig(BaseModel):
|
|
20
|
+
chunk_lines: int = 60
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class NsConfig(BaseModel):
|
|
24
|
+
default: list[str] = Field(default_factory=lambda: ["public"])
|
|
25
|
+
token_map: dict[str, list[str]] = Field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Config(BaseModel):
|
|
29
|
+
provider: ProviderConfig = Field(default_factory=ProviderConfig)
|
|
30
|
+
compile: CompileConfig = Field(default_factory=CompileConfig)
|
|
31
|
+
ns: NsConfig = Field(default_factory=NsConfig)
|
|
32
|
+
install_source: str | None = None # pypi | local | git+URL | path
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_config(path: Path) -> Config:
|
|
36
|
+
if not path.exists():
|
|
37
|
+
return Config()
|
|
38
|
+
data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
|
39
|
+
return Config.model_validate(data)
|
lorekeep/defaults.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Default config + schema used by `lorekeep init` to bootstrap a fresh home."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
DEFAULT_SCHEMA = {
|
|
5
|
+
"version": 2,
|
|
6
|
+
"node_types": {
|
|
7
|
+
"service": {"props": {"name": "string", "lang": "string"}},
|
|
8
|
+
"team": {"props": {"name": "string"}},
|
|
9
|
+
"decision": {"props": {"title": "string"}},
|
|
10
|
+
"project": {"props": {"name": "string", "status": "string"}},
|
|
11
|
+
"person": {"props": {"name": "string", "role": "string"}},
|
|
12
|
+
"tool": {"props": {"name": "string", "category": "string"}},
|
|
13
|
+
"command": {"props": {"name": "string", "platform": "string"}},
|
|
14
|
+
"concept": {"props": {"name": "string", "domain": "string"}},
|
|
15
|
+
"note": {"props": {"title": "string", "topic": "string"}},
|
|
16
|
+
"document": {"props": {"title": "string", "kind": "string"}},
|
|
17
|
+
},
|
|
18
|
+
"edge_types": {
|
|
19
|
+
"depends_on": {"from": "service", "to": "service"},
|
|
20
|
+
"decided_by": {"from": "decision", "to": "team"},
|
|
21
|
+
"owns": {"from": "team", "to": "service"},
|
|
22
|
+
"part_of": {"from": "service", "to": "project"},
|
|
23
|
+
"uses": {"from": "service", "to": "tool"},
|
|
24
|
+
"mentions": {"from": "note", "to": "concept"},
|
|
25
|
+
"documents": {"from": "document", "to": "concept"},
|
|
26
|
+
"describes": {"from": "note", "to": "service"},
|
|
27
|
+
"relates_to": {"from": "concept", "to": "concept"},
|
|
28
|
+
},
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
DEFAULT_CONFIG_YAML = """\
|
|
32
|
+
provider:
|
|
33
|
+
backend: openai
|
|
34
|
+
model: openai/gpt-4o-mini
|
|
35
|
+
api_base: null
|
|
36
|
+
api_key_env: OPENAI_API_KEY
|
|
37
|
+
api_key: null
|
|
38
|
+
temperature: 0.0
|
|
39
|
+
compile:
|
|
40
|
+
chunk_lines: 60
|
|
41
|
+
ns:
|
|
42
|
+
default: [public]
|
|
43
|
+
install_source: pypi
|
|
44
|
+
"""
|
|
File without changes
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Tier-1 construction-quality evaluation vs a gold corpus."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from itertools import combinations
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from lorekeep.eval.gold import edge_key, load_compiled, load_gold, node_key
|
|
8
|
+
from lorekeep.models import Edge, Node
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def precision_recall_f1(gold: set, got: set) -> tuple[float, float, float]:
|
|
12
|
+
if not gold and not got:
|
|
13
|
+
return 1.0, 1.0, 1.0
|
|
14
|
+
tp = len(gold & got)
|
|
15
|
+
p = tp / len(got) if got else 0.0
|
|
16
|
+
r = tp / len(gold) if gold else 0.0
|
|
17
|
+
f1 = (2 * p * r / (p + r)) if (p + r) else 0.0
|
|
18
|
+
return p, r, f1
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extraction_report(graph_dir: Path, gold_dir: Path) -> dict:
|
|
22
|
+
compiled = load_compiled(graph_dir)
|
|
23
|
+
gold = load_gold(gold_dir)
|
|
24
|
+
|
|
25
|
+
c_nodes = [f for f in compiled if isinstance(f, Node)]
|
|
26
|
+
c_edges = [f for f in compiled if isinstance(f, Edge)]
|
|
27
|
+
g_nodes = [f for f in gold if isinstance(f, Node)]
|
|
28
|
+
g_edges = [f for f in gold if isinstance(f, Edge)]
|
|
29
|
+
|
|
30
|
+
c_ids = {n.id: n for n in c_nodes}
|
|
31
|
+
g_ids = {n.id: n for n in g_nodes}
|
|
32
|
+
|
|
33
|
+
c_node_keys = {node_key(n) for n in c_nodes}
|
|
34
|
+
g_node_keys = {node_key(n) for n in g_nodes}
|
|
35
|
+
c_edge_keys = {edge_key(e, c_ids) for e in c_edges}
|
|
36
|
+
g_edge_keys = {edge_key(e, g_ids) for e in g_edges}
|
|
37
|
+
|
|
38
|
+
np, nr, nf = precision_recall_f1(g_node_keys, c_node_keys)
|
|
39
|
+
ep, er, ef = precision_recall_f1(g_edge_keys, c_edge_keys)
|
|
40
|
+
return {
|
|
41
|
+
"nodes": {"precision": np, "recall": nr, "f1": nf},
|
|
42
|
+
"edges": {"precision": ep, "recall": er, "f1": ef},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _clusters_from_aliases(alias_groups: list[dict]) -> list[set[str]]:
|
|
47
|
+
"""Each gold group -> set of surface names that should be one entity."""
|
|
48
|
+
return [set(g["aliases"]) for g in alias_groups]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _compiled_clusters(compiled_nodes: list) -> list[set[str]]:
|
|
52
|
+
"""Group compiled node names by their id (canonical entity)."""
|
|
53
|
+
by_id: dict[str, set[str]] = {}
|
|
54
|
+
for n in compiled_nodes:
|
|
55
|
+
nm = n.props.get("name", n.id)
|
|
56
|
+
by_id.setdefault(n.id, set()).add(nm)
|
|
57
|
+
return list(by_id.values())
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def entity_resolution_f1(compiled_nodes: list, gold_alias_groups: list[dict]) -> dict:
|
|
61
|
+
"""Pairwise coreference F1: for every pair of mentions, did compiled agree with gold?"""
|
|
62
|
+
gold_pairs: set[frozenset] = set()
|
|
63
|
+
for cluster in _clusters_from_aliases(gold_alias_groups):
|
|
64
|
+
for a, b in combinations(sorted(cluster), 2):
|
|
65
|
+
gold_pairs.add(frozenset((a, b)))
|
|
66
|
+
|
|
67
|
+
got_pairs: set[frozenset] = set()
|
|
68
|
+
for cluster in _compiled_clusters(compiled_nodes):
|
|
69
|
+
for a, b in combinations(sorted(cluster), 2):
|
|
70
|
+
got_pairs.add(frozenset((a, b)))
|
|
71
|
+
|
|
72
|
+
p, r, f1 = precision_recall_f1(gold_pairs, got_pairs)
|
|
73
|
+
return {"precision": p, "recall": r, "f1": f1}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def structure_report(facts_dir: Path) -> dict:
|
|
77
|
+
"""Graph-shape metrics: counts, avg degree, density, dangling-edge rate."""
|
|
78
|
+
# accept either a gold dir or a compiled graph dir
|
|
79
|
+
if (facts_dir / "facts.jsonl").exists():
|
|
80
|
+
facts = load_compiled(facts_dir)
|
|
81
|
+
else:
|
|
82
|
+
facts = load_gold(facts_dir)
|
|
83
|
+
nodes = [f for f in facts if isinstance(f, Node)]
|
|
84
|
+
edges = [f for f in facts if isinstance(f, Edge)]
|
|
85
|
+
node_ids = {n.id for n in nodes}
|
|
86
|
+
dangling = sum(1 for e in edges if e.from_ not in node_ids or e.to not in node_ids)
|
|
87
|
+
n = len(nodes)
|
|
88
|
+
e = len(edges)
|
|
89
|
+
avg_degree = (e / n) if n else 0.0
|
|
90
|
+
density = (e / (n * (n - 1))) if n > 1 else 0.0
|
|
91
|
+
return {
|
|
92
|
+
"node_count": n,
|
|
93
|
+
"edge_count": e,
|
|
94
|
+
"avg_degree": round(avg_degree, 4),
|
|
95
|
+
"density": round(density, 4),
|
|
96
|
+
"dangling_edge_rate": round(dangling / e, 4) if e else 0.0,
|
|
97
|
+
}
|
lorekeep/eval/gold.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Load gold + compiled facts, and define match keys for evaluation."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from lorekeep.facts_io import read_facts
|
|
7
|
+
from lorekeep.models import Edge, Node
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_gold(gold_dir: Path) -> list[Node | Edge]:
|
|
11
|
+
"""Load every *.facts.jsonl under gold_dir."""
|
|
12
|
+
facts: list[Node | Edge] = []
|
|
13
|
+
for p in sorted(gold_dir.glob("**/*.facts.jsonl")):
|
|
14
|
+
facts.extend(read_facts(p))
|
|
15
|
+
return facts
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_compiled(graph_dir: Path) -> list[Node | Edge]:
|
|
19
|
+
return read_facts(graph_dir / "facts.jsonl")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def node_key(n: Node) -> tuple[str, str]:
|
|
23
|
+
return (n.type, n.props.get("name", n.id))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def edge_key(e: Edge, nodes_by_id: dict[str, Node]) -> tuple[str, str, str]:
|
|
27
|
+
f = nodes_by_id.get(e.from_)
|
|
28
|
+
t = nodes_by_id.get(e.to)
|
|
29
|
+
fn = f.props.get("name", e.from_) if f else e.from_
|
|
30
|
+
tn = t.props.get("name", e.to) if t else e.to
|
|
31
|
+
return (e.type, fn, tn)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Tier-2 retrieval/temporal eval: minimal harness (not full benchmark datasets).
|
|
2
|
+
|
|
3
|
+
Loads a fixture graph + a small JSON question set, runs the scoped query path,
|
|
4
|
+
and checks expected node ids / edge-type presence per question. Full
|
|
5
|
+
HotpotQA/CronQuestions adaptation is deferred (spec §16 Tier 2).
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from lorekeep.perm.ns import ScopedGraph
|
|
13
|
+
from lorekeep.store.graph import GraphStore, parse_date
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def retrieval_report(graph_dir: Path, questions_path: Path, allowed_ns) -> dict:
|
|
17
|
+
store = GraphStore.from_jsonl(Path(graph_dir) / "facts.jsonl")
|
|
18
|
+
scoped = ScopedGraph(store, allowed_ns)
|
|
19
|
+
questions = json.loads(Path(questions_path).read_text())
|
|
20
|
+
|
|
21
|
+
total = len(questions)
|
|
22
|
+
failures = []
|
|
23
|
+
for q in questions:
|
|
24
|
+
ok = _check(scoped, q)
|
|
25
|
+
if not ok:
|
|
26
|
+
failures.append(q["id"])
|
|
27
|
+
return {"total": total, "passed": total - len(failures), "failures": failures}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _check(scoped: ScopedGraph, q: dict) -> bool:
|
|
31
|
+
kind = q["kind"]
|
|
32
|
+
if kind == "multihop":
|
|
33
|
+
res = scoped.neighbors(q["start"], depth=q.get("depth", 1))
|
|
34
|
+
got = {n.id for n in res["nodes"]}
|
|
35
|
+
return set(q["expect_node_ids"]).issubset(got)
|
|
36
|
+
if kind == "temporal":
|
|
37
|
+
_, edges = scoped.snapshot(parse_date(q["time"]))
|
|
38
|
+
types = {e.type for e in edges}
|
|
39
|
+
if "expect_edge_types_present" in q:
|
|
40
|
+
if not set(q["expect_edge_types_present"]).issubset(types):
|
|
41
|
+
return False
|
|
42
|
+
if "expect_edge_types_absent" in q:
|
|
43
|
+
if set(q["expect_edge_types_absent"]) & types:
|
|
44
|
+
return False
|
|
45
|
+
return True
|
|
46
|
+
return False
|
lorekeep/facts_io.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Shared loader for facts.jsonl -> list[Node|Edge]. Used by store + eval."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from lorekeep.models import Edge, Node
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def read_facts(path: Path) -> list[Node | Edge]:
|
|
11
|
+
"""Read a facts.jsonl file (one JSON object per line) into typed facts."""
|
|
12
|
+
facts: list[Node | Edge] = []
|
|
13
|
+
for line in Path(path).read_text(encoding="utf-8").splitlines():
|
|
14
|
+
line = line.strip()
|
|
15
|
+
if not line:
|
|
16
|
+
continue
|
|
17
|
+
d = json.loads(line)
|
|
18
|
+
if d["kind"] == "node":
|
|
19
|
+
facts.append(Node.model_validate(d))
|
|
20
|
+
else:
|
|
21
|
+
facts.append(Edge.model_validate(d))
|
|
22
|
+
return facts
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Claude Code MCP config writer (.mcp.json)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def write_config(target_dir: Path, command: str, args: list[str], ns: str | None) -> Path:
|
|
9
|
+
entry = {"command": command, "args": args}
|
|
10
|
+
if ns:
|
|
11
|
+
entry["env"] = {"LOREKEEP_NS": ns}
|
|
12
|
+
path = Path(target_dir) / ".mcp.json"
|
|
13
|
+
existing = {}
|
|
14
|
+
if path.exists():
|
|
15
|
+
existing = json.loads(path.read_text())
|
|
16
|
+
servers = existing.get("mcpServers", {})
|
|
17
|
+
servers["lorekeep"] = entry
|
|
18
|
+
path.write_text(json.dumps({"mcpServers": servers}, indent=2))
|
|
19
|
+
return path
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Codex MCP config writer (config.toml [mcp_servers.lorekeep]).
|
|
2
|
+
|
|
3
|
+
Idempotent: re-running replaces the existing [mcp_servers.lorekeep] block instead
|
|
4
|
+
of appending a duplicate. Values are escaped so a stray quote/backslash in the
|
|
5
|
+
namespace or command can't break the generated TOML.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
_HEADER = "[mcp_servers.lorekeep]"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _toml_escape(s: str) -> str:
|
|
15
|
+
return s.replace("\\", "\\\\").replace('"', '\\"')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _toml_quote_list(items: list[str]) -> str:
|
|
19
|
+
return "[" + ", ".join(f'"{_toml_escape(i)}"' for i in items) + "]"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _lorekeep_block(command: str, args: list[str], ns: str | None) -> str:
|
|
23
|
+
lines = [
|
|
24
|
+
_HEADER,
|
|
25
|
+
f'command = "{_toml_escape(command)}"',
|
|
26
|
+
f"args = {_toml_quote_list(args)}",
|
|
27
|
+
]
|
|
28
|
+
if ns:
|
|
29
|
+
lines.append(f'env = {{ LOREKEEP_NS = "{_toml_escape(ns)}" }}')
|
|
30
|
+
return "\n".join(lines)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def write_config(target_dir: Path, command: str, args: list[str], ns: str | None) -> Path:
|
|
34
|
+
if ns and ("\n" in ns or "\r" in ns):
|
|
35
|
+
raise ValueError("namespace must not contain newlines")
|
|
36
|
+
path = Path(target_dir) / "config.toml"
|
|
37
|
+
block = _lorekeep_block(command, args, ns)
|
|
38
|
+
text = path.read_text() if path.exists() else ""
|
|
39
|
+
lines = text.splitlines()
|
|
40
|
+
header_idx = next((i for i, l in enumerate(lines) if l.strip() == _HEADER), -1)
|
|
41
|
+
if header_idx == -1:
|
|
42
|
+
sep = "\n\n" if text.strip() else ""
|
|
43
|
+
new_text = text + sep + block + "\n"
|
|
44
|
+
else:
|
|
45
|
+
end = len(lines)
|
|
46
|
+
for i in range(header_idx + 1, len(lines)):
|
|
47
|
+
if lines[i].startswith("["): # next top-level table
|
|
48
|
+
end = i
|
|
49
|
+
break
|
|
50
|
+
before = lines[:header_idx]
|
|
51
|
+
after = lines[end:]
|
|
52
|
+
rebuilt = before + [block] + ([""] + after if after else [])
|
|
53
|
+
new_text = "\n".join(rebuilt) + "\n"
|
|
54
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
path.write_text(new_text)
|
|
56
|
+
return path
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Shared integration helpers: resolve install command + agent-memory snippet."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def resolve_command(install_source: str | None) -> tuple[str, list[str]]:
|
|
6
|
+
"""Return (command, args) to launch `lorekeep serve --transport stdio`."""
|
|
7
|
+
serve_args = ["serve", "--transport", "stdio"]
|
|
8
|
+
if not install_source or install_source == "pypi":
|
|
9
|
+
return ("uvx", ["lorekeep", *serve_args])
|
|
10
|
+
if install_source == "local":
|
|
11
|
+
return ("lorekeep", serve_args)
|
|
12
|
+
# anything else (git+URL, local path) -> uvx --from <source>
|
|
13
|
+
return ("uvx", ["--from", install_source, "lorekeep", *serve_args])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def agent_memory_snippet() -> str:
|
|
17
|
+
return (
|
|
18
|
+
"## Lorekeep knowledge base (MCP)\n"
|
|
19
|
+
"Before answering architecture/code/domain questions, query Lorekeep:\n"
|
|
20
|
+
"search(q) -> get_node(id) -> neighbors / at_time / history as needed.\n"
|
|
21
|
+
"Always cite `src` provenance. Knowledge is namespace-scoped - if a fact is\n"
|
|
22
|
+
"missing, it may be outside your scope, not nonexistent.\n"
|
|
23
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Cursor MCP config writer (.cursor/mcp.json)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def write_config(target_dir: Path, command: str, args: list[str], ns: str | None) -> Path:
|
|
9
|
+
entry = {"command": command, "args": args}
|
|
10
|
+
if ns:
|
|
11
|
+
entry["env"] = {"LOREKEEP_NS": ns}
|
|
12
|
+
d = Path(target_dir) / ".cursor"
|
|
13
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
path = d / "mcp.json"
|
|
15
|
+
existing = {}
|
|
16
|
+
if path.exists():
|
|
17
|
+
existing = json.loads(path.read_text())
|
|
18
|
+
servers = existing.get("mcpServers", {})
|
|
19
|
+
servers["lorekeep"] = entry
|
|
20
|
+
path.write_text(json.dumps({"mcpServers": servers}, indent=2))
|
|
21
|
+
return path
|
lorekeep/mcp_server.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""FastMCP server exposing the scoped temporal graph, read-only.
|
|
2
|
+
|
|
3
|
+
Tools are plain module functions using a module-global ScopedGraph set by
|
|
4
|
+
configure(). @mcp.tool() registers them with FastMCP but they remain directly
|
|
5
|
+
callable, so tests invoke them without the MCP transport.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from mcp.server.fastmcp import FastMCP
|
|
12
|
+
|
|
13
|
+
from lorekeep.models import Schema
|
|
14
|
+
from lorekeep.perm.ns import ScopedGraph
|
|
15
|
+
from lorekeep.schema_io import load_schema
|
|
16
|
+
from lorekeep.store.graph import GraphStore, parse_date
|
|
17
|
+
|
|
18
|
+
mcp = FastMCP("lorekeep")
|
|
19
|
+
|
|
20
|
+
_state: dict = {} # graph_dir, allowed_ns, schema_path, facts_mtime
|
|
21
|
+
_scope: ScopedGraph | None = None
|
|
22
|
+
_schema: Schema | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def configure(graph_dir, allowed_ns, schema_path=None, fts_path=None) -> None:
|
|
26
|
+
"""Set the graph location + scope, then build. Safe to call again to refresh."""
|
|
27
|
+
_state["graph_dir"] = Path(graph_dir)
|
|
28
|
+
_state["allowed_ns"] = list(allowed_ns)
|
|
29
|
+
_state["schema_path"] = Path(schema_path) if schema_path else None
|
|
30
|
+
_rebuild()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _rebuild() -> None:
|
|
34
|
+
"""(Re)load the graph + schema from disk into a fresh ScopedGraph."""
|
|
35
|
+
global _scope, _schema
|
|
36
|
+
facts = _state["graph_dir"] / "facts.jsonl"
|
|
37
|
+
store = GraphStore.from_jsonl(facts)
|
|
38
|
+
sp = _state.get("schema_path")
|
|
39
|
+
_schema = load_schema(sp) if sp else None
|
|
40
|
+
_scope = ScopedGraph(store, _state["allowed_ns"])
|
|
41
|
+
_state["facts_mtime"] = facts.stat().st_mtime if facts.exists() else 0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _require() -> ScopedGraph:
|
|
45
|
+
"""Return the scoped graph, lazy-reloading if facts.jsonl changed on disk."""
|
|
46
|
+
if not _state:
|
|
47
|
+
raise RuntimeError("mcp_server not configured; call configure() first")
|
|
48
|
+
facts = _state["graph_dir"] / "facts.jsonl"
|
|
49
|
+
mtime = facts.stat().st_mtime if facts.exists() else 0
|
|
50
|
+
if _scope is None or mtime != _state.get("facts_mtime"):
|
|
51
|
+
_rebuild()
|
|
52
|
+
return _scope
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@mcp.tool()
|
|
56
|
+
def get_node(id: str) -> dict:
|
|
57
|
+
"""Return a node by id (props + provenance), or error if absent/out of scope."""
|
|
58
|
+
node = _require().get_node(id)
|
|
59
|
+
if node is None:
|
|
60
|
+
return {"error": "not found or out of scope"}
|
|
61
|
+
return node.model_dump(mode="json", by_alias=True)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@mcp.tool()
|
|
65
|
+
def neighbors(id: str, edge_type: str = "", depth: int = 1) -> dict:
|
|
66
|
+
"""Traverse neighbors up to depth (both directions), scoped to the caller."""
|
|
67
|
+
scoped = _require()
|
|
68
|
+
depth = max(1, min(int(depth), 5)) # bound BFS cost; 5 hops spans any realistic graph
|
|
69
|
+
res = scoped.neighbors(id, edge_type or None, depth)
|
|
70
|
+
return {
|
|
71
|
+
"nodes": [n.model_dump(mode="json", by_alias=True) for n in res["nodes"]],
|
|
72
|
+
"edges": [e.model_dump(mode="json", by_alias=True) for e in res["edges"]],
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@mcp.tool()
|
|
77
|
+
def schema() -> dict:
|
|
78
|
+
"""Return the graph schema (node/edge types)."""
|
|
79
|
+
if _schema is None:
|
|
80
|
+
return {"error": "no schema loaded"}
|
|
81
|
+
return _schema.model_dump(mode="json", by_alias=True)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@mcp.tool()
|
|
85
|
+
def list_namespaces() -> list:
|
|
86
|
+
"""Namespaces visible to this caller."""
|
|
87
|
+
return _require().list_namespaces()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@mcp.tool()
|
|
91
|
+
def at_time(time: str) -> dict:
|
|
92
|
+
"""Snapshot of facts valid at an ISO date (half-open [valid_from, valid_to))."""
|
|
93
|
+
scoped = _require()
|
|
94
|
+
nodes, edges = scoped.snapshot(parse_date(time))
|
|
95
|
+
return {
|
|
96
|
+
"nodes": [n.model_dump(mode="json", by_alias=True) for n in nodes],
|
|
97
|
+
"edges": [e.model_dump(mode="json", by_alias=True) for e in edges],
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@mcp.tool()
|
|
102
|
+
def history(id: str) -> list:
|
|
103
|
+
"""All versions of an entity + edges touching it, ordered by valid_from."""
|
|
104
|
+
return _require().history(id)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@mcp.tool()
|
|
108
|
+
def changes(from_t: str, to_t: str) -> dict:
|
|
109
|
+
"""Edges whose validity began or ended within [from_t, to_t)."""
|
|
110
|
+
return _require().changes(parse_date(from_t), parse_date(to_t))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@mcp.tool()
|
|
114
|
+
def search(query: str, limit: int = 10) -> list:
|
|
115
|
+
"""Text search over node ids/props, scoped to the caller."""
|
|
116
|
+
return _require().search(query, limit)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
mcp.run()
|
lorekeep/models.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Core data models for Lorekeep. The shared contract across compile + eval."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
from datetime import date
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DocChunk(BaseModel):
|
|
13
|
+
"""A slice of a raw document, with provenance back to path:line."""
|
|
14
|
+
model_config = ConfigDict(frozen=True)
|
|
15
|
+
|
|
16
|
+
path: str
|
|
17
|
+
start_line: int # 1-based
|
|
18
|
+
end_line: int
|
|
19
|
+
text: str
|
|
20
|
+
namespace: str # e.g. "teams/backend"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def src(self) -> str:
|
|
24
|
+
return f"{self.path}:{self.start_line}"
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def hash(self) -> str:
|
|
28
|
+
h = hashlib.sha256()
|
|
29
|
+
h.update(self.path.encode("utf-8"))
|
|
30
|
+
h.update(b"\n")
|
|
31
|
+
h.update(self.text.encode("utf-8"))
|
|
32
|
+
return h.hexdigest()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Node(BaseModel):
|
|
36
|
+
model_config = ConfigDict(frozen=True, extra="forbid")
|
|
37
|
+
kind: Literal["node"] = "node"
|
|
38
|
+
id: str
|
|
39
|
+
type: str
|
|
40
|
+
ns: tuple[str, ...]
|
|
41
|
+
valid_from: date | None = None
|
|
42
|
+
valid_to: date | None = None
|
|
43
|
+
props: dict[str, Any] = Field(default_factory=dict)
|
|
44
|
+
src: tuple[str, ...] = Field(default_factory=tuple)
|
|
45
|
+
|
|
46
|
+
def to_json_line(self) -> str:
|
|
47
|
+
d = self.model_dump(mode="json", by_alias=True)
|
|
48
|
+
return json.dumps(d, sort_keys=True, ensure_ascii=False, separators=(",", ":"))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Edge(BaseModel):
|
|
52
|
+
model_config = ConfigDict(frozen=True, extra="forbid", populate_by_name=True)
|
|
53
|
+
kind: Literal["edge"] = "edge"
|
|
54
|
+
id: str
|
|
55
|
+
type: str
|
|
56
|
+
from_: str = Field(alias="from")
|
|
57
|
+
to: str
|
|
58
|
+
ns: tuple[str, ...]
|
|
59
|
+
valid_from: date | None = None
|
|
60
|
+
valid_to: date | None = None
|
|
61
|
+
props: dict[str, Any] = Field(default_factory=dict)
|
|
62
|
+
src: tuple[str, ...] = Field(default_factory=tuple)
|
|
63
|
+
|
|
64
|
+
def to_json_line(self) -> str:
|
|
65
|
+
d = self.model_dump(mode="json", by_alias=True)
|
|
66
|
+
return json.dumps(d, sort_keys=True, ensure_ascii=False, separators=(",", ":"))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
Fact = Node | Edge
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TypeSpec(BaseModel):
|
|
73
|
+
model_config = ConfigDict(frozen=True, extra="forbid")
|
|
74
|
+
props: dict[str, str] = Field(default_factory=dict)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class EndpointSpec(BaseModel):
|
|
78
|
+
model_config = ConfigDict(frozen=True, extra="forbid", populate_by_name=True)
|
|
79
|
+
from_: str = Field(alias="from")
|
|
80
|
+
to: str
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class Schema(BaseModel):
|
|
84
|
+
model_config = ConfigDict(frozen=True, extra="forbid")
|
|
85
|
+
version: int
|
|
86
|
+
node_types: dict[str, TypeSpec]
|
|
87
|
+
edge_types: dict[str, EndpointSpec]
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def load(cls, data: dict[str, Any]) -> "Schema":
|
|
91
|
+
return cls.model_validate(data)
|
|
92
|
+
|
|
93
|
+
def is_valid_node_type(self, t: str) -> bool:
|
|
94
|
+
return t in self.node_types
|
|
95
|
+
|
|
96
|
+
def is_valid_edge_type(self, t: str) -> bool:
|
|
97
|
+
return t in self.edge_types
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class CompileError(BaseModel):
|
|
101
|
+
model_config = ConfigDict(frozen=True)
|
|
102
|
+
path: str
|
|
103
|
+
line: int
|
|
104
|
+
message: str
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class QuarantineItem(BaseModel):
|
|
108
|
+
model_config = ConfigDict(frozen=True)
|
|
109
|
+
fact: dict[str, Any]
|
|
110
|
+
reason: str
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class Manifest(BaseModel):
|
|
114
|
+
model_config = ConfigDict(extra="forbid")
|
|
115
|
+
schema_version: int
|
|
116
|
+
chunk_count: int
|
|
117
|
+
node_count: int
|
|
118
|
+
edge_count: int
|
|
119
|
+
run_id: str
|
|
120
|
+
facts_hash: str
|
|
121
|
+
chunk_hashes: dict[str, list[str]] = Field(default_factory=dict)
|
|
122
|
+
errors: list[CompileError] = Field(default_factory=list)
|
|
123
|
+
quarantine: list[QuarantineItem] = Field(default_factory=list)
|
|
124
|
+
|
|
125
|
+
def to_json(self) -> str:
|
|
126
|
+
return json.dumps(self.model_dump(mode="json"), sort_keys=True, indent=2)
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def from_json(cls, text: str) -> "Manifest":
|
|
130
|
+
return cls.model_validate(json.loads(text))
|