agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,218 @@
1
+ """``IncrementalIndexer`` — apply a ``ChangeSet`` to an existing index.
2
+
3
+ Cost is proportional to the diff and its import-graph neighbourhood, not the
4
+ repo. The sequence (spec §4.3):
5
+
6
+ 1. record the symbols about to disappear (for dirty propagation);
7
+ 2. delete removed files (graph + vectors);
8
+ 3. re-extract + upsert the touched files (scoped ``IngestPipeline``);
9
+ 4. clear resolved edges in the re-resolve *scope* and re-resolve just that
10
+ scope — ``scope = changed ∪ importers(changed)`` out to
11
+ ``resolve_scope_hops`` import-graph hops;
12
+ 5. append the dirtied symbols (changed + 1-hop neighbours) to the ``DirtySet``.
13
+
14
+ Correctness is asserted by the equivalence property test
15
+ (``refresh(diff) == full_reindex``); this module's scope heuristics are the
16
+ performance knob, the property test is the safety net.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from pathlib import PurePosixPath
22
+
23
+ from agentforge_graph.core import GraphQuery, Node, NodeKind, SymbolID
24
+ from agentforge_graph.frameworks import FrameworkExtractor
25
+ from agentforge_graph.store import Store
26
+
27
+ from ..pack import PackRegistry
28
+ from ..report import IndexReport
29
+ from ..resolver import ImportResolver
30
+ from ..source import RepoSource, read_go_module
31
+ from .detect import ChangeSet
32
+ from .dirty import DirtySet
33
+ from .ports import TemporalRecorder
34
+
35
+ _ALL = 10_000_000
36
+
37
+
38
+ class IncrementalIndexer:
39
+ def __init__(
40
+ self,
41
+ store: Store,
42
+ source: RepoSource,
43
+ registry: PackRegistry,
44
+ repo: str,
45
+ commit: str = "",
46
+ resolve_scope_hops: int = 1,
47
+ dirty: DirtySet | None = None,
48
+ frameworks: FrameworkExtractor | None = None,
49
+ recorder: TemporalRecorder | None = None,
50
+ commit_ts: int = 0,
51
+ ) -> None:
52
+ self.store = store
53
+ self.source = source
54
+ self.registry = registry
55
+ self.repo = repo
56
+ self.commit = commit
57
+ self.resolve_scope_hops = resolve_scope_hops
58
+ self.dirty = dirty
59
+ self.frameworks = frameworks
60
+ # feat-009: optional evolution-log recorder (None when temporal off).
61
+ self.recorder = recorder
62
+ self.commit_ts = commit_ts
63
+
64
+ async def refresh(self, changes: ChangeSet) -> IndexReport:
65
+ if changes.is_empty():
66
+ return IndexReport()
67
+
68
+ # avoid an import cycle (pipeline imports nothing incremental)
69
+ from ..pipeline import IngestPipeline
70
+
71
+ removed = changes.removed_paths()
72
+ touched = set(changes.touched_paths())
73
+
74
+ # (0) feat-009: snapshot the symbols of every file the diff touches
75
+ # (removed + touched) *before* any mutation — diffing against the
76
+ # post-upsert set yields exactly the opened/closed lifecycle events.
77
+ before_symbols: set[str] = set()
78
+ if self.recorder is not None:
79
+ before_symbols = await self._symbols_in(sorted({*removed, *touched}))
80
+
81
+ # (1) symbols that will vanish with the removed files — dirty them now
82
+ dirty_ids: set[str] = await self._symbols_in(removed)
83
+
84
+ # (2) delete removed files from both stores
85
+ for path in removed:
86
+ await self.store.graph.delete_file(path)
87
+ await self.store.vectors.delete_where({"path": path})
88
+
89
+ # (3) re-extract + upsert the touched files (resolve deferred to (4));
90
+ # active framework packs re-emit their facts into the touched subgraphs.
91
+ report = await IngestPipeline(self.repo, self.commit, frameworks=self.frameworks).run(
92
+ self.source, self.store.graph, self.registry, paths=touched
93
+ )
94
+
95
+ # (4) scoped re-resolve: clear the scope's resolved edges, rebuild them
96
+ scope = await self._resolve_scope(changes)
97
+ await self.store.graph.clear_resolved(sorted(scope))
98
+ stats = await ImportResolver(
99
+ self.registry, self.commit, go_module=read_go_module(self.source.root)
100
+ ).resolve(self.store.graph, changed_files=sorted(scope))
101
+ report.resolve = stats
102
+ imports = stats.imports_resolved + stats.imports_external
103
+ report.by_edge_kind["IMPORTS"] = report.by_edge_kind.get("IMPORTS", 0) + imports
104
+ report.by_edge_kind["CALLS"] = report.by_edge_kind.get("CALLS", 0) + stats.refs_resolved
105
+ if stats.inherits_resolved:
106
+ report.by_edge_kind["INHERITS"] = (
107
+ report.by_edge_kind.get("INHERITS", 0) + stats.inherits_resolved
108
+ )
109
+ report.edges += imports + stats.refs_resolved + stats.inherits_resolved
110
+
111
+ # feat-011 pass-2: ORM RELATES_TO is globally idempotent (clear all +
112
+ # rebuild from the whole-repo model set), so a scoped refresh converges
113
+ # to the same edges a full re-index would produce.
114
+ if self.frameworks is not None and self.frameworks.active:
115
+ resolved, unresolved = await self.frameworks.resolve(self.store.graph, self.commit)
116
+ if resolved:
117
+ report.relations_resolved = resolved
118
+ report.by_edge_kind["RELATES_TO"] = (
119
+ report.by_edge_kind.get("RELATES_TO", 0) + resolved
120
+ )
121
+ report.edges += resolved
122
+ report.framework_unresolved += unresolved
123
+
124
+ # (5) dirty propagation: touched symbols + 1-hop neighbours of all dirty
125
+ after_symbols = await self._symbols_in(sorted(touched))
126
+ dirty_ids |= after_symbols
127
+ dirty_ids |= await self._neighbours_of(dirty_ids)
128
+ if self.dirty is not None:
129
+ await self.dirty.add(sorted(dirty_ids))
130
+
131
+ # (6) feat-009: record lifecycle. opened = now present, weren't before;
132
+ # closed = were present, now gone (deleted files + symbols dropped from a
133
+ # modified file). Written in one transaction at flush.
134
+ if self.recorder is not None:
135
+ self.recorder.open(sorted(after_symbols - before_symbols), self.commit, self.commit_ts)
136
+ self.recorder.close(sorted(before_symbols - after_symbols), self.commit, self.commit_ts)
137
+ # churn/authorship for the touched files → aggregates + denormalised
138
+ # onto node attrs (feat-009 chunk 2).
139
+ await self.recorder.record_churn(
140
+ self.store.graph,
141
+ str(self.source.root),
142
+ sorted(touched),
143
+ self.commit,
144
+ self.commit_ts,
145
+ )
146
+ await self.recorder.flush()
147
+ return report
148
+
149
+ # --- helpers ----------------------------------------------------------
150
+
151
+ async def _all_nodes(self) -> list[Node]:
152
+ return (await self.store.graph.query(GraphQuery(limit=_ALL))).nodes
153
+
154
+ async def _symbols_in(self, paths: list[str]) -> set[str]:
155
+ """Code-symbol ids (Class/Function/Method) whose file is in ``paths``."""
156
+ if not paths:
157
+ return set()
158
+ want = set(paths)
159
+ kinds = {NodeKind.CLASS, NodeKind.FUNCTION, NodeKind.METHOD}
160
+ return {
161
+ n.id
162
+ for n in await self._all_nodes()
163
+ if n.kind in kinds and SymbolID.parse(n.id).path in want
164
+ }
165
+
166
+ async def _neighbours_of(self, ids: set[str]) -> set[str]:
167
+ out: set[str] = set()
168
+ for nid in ids:
169
+ for nb in await self.store.graph.neighbors(nid, None, depth=1):
170
+ out.add(nb.id)
171
+ return out
172
+
173
+ async def _resolve_scope(self, changes: ChangeSet) -> set[str]:
174
+ """``changed ∪ importers(changed)`` out to ``resolve_scope_hops`` hops
175
+ in the import graph. Importers are matched by *module key* (not by edge)
176
+ so added, deleted and modified files are handled uniformly — an importer
177
+ of an added file resolves to it now; an importer of a deleted file falls
178
+ back to an external package, exactly as a full re-index would."""
179
+ scope = set(changes.changed_paths())
180
+ # per-file imports as module keys, read from the current graph
181
+ file_imports = await self._file_import_keys()
182
+ frontier = self._module_keys(scope)
183
+ for _ in range(max(self.resolve_scope_hops, 0)):
184
+ importers = {
185
+ path for path, keys in file_imports.items() if keys & frontier and path not in scope
186
+ }
187
+ if not importers:
188
+ break
189
+ scope |= importers
190
+ frontier = self._module_keys(importers)
191
+ return scope
192
+
193
+ async def _file_import_keys(self) -> dict[str, set[str]]:
194
+ """For every FILE node, the set of module keys it imports (resolved the
195
+ same way the resolver resolves them)."""
196
+ out: dict[str, set[str]] = {}
197
+ for n in await self._all_nodes():
198
+ if n.kind is not NodeKind.FILE:
199
+ continue
200
+ path = SymbolID.parse(n.id).path
201
+ pack = self.registry.for_extension(PurePosixPath(path).suffix)
202
+ if pack is None:
203
+ continue
204
+ keys = {
205
+ pack.resolve_import(path, imp.get("module", ""))
206
+ for imp in n.attrs.get("imports", [])
207
+ if imp.get("module")
208
+ }
209
+ out[path] = keys
210
+ return out
211
+
212
+ def _module_keys(self, paths: set[str]) -> set[str]:
213
+ keys: set[str] = set()
214
+ for path in paths:
215
+ pack = self.registry.for_extension(PurePosixPath(path).suffix)
216
+ if pack is not None:
217
+ keys.add(pack.module_path(path))
218
+ return keys
@@ -0,0 +1,72 @@
1
+ """``IndexMeta`` — the persisted ``.ckg/meta.json`` manifest (feat-004).
2
+
3
+ Extends the minimal ``{schema_version, indexed_commit}`` that ``Store.open``
4
+ writes on first open into the full state the next diff needs: the git commit
5
+ the index was built at, a per-language pack fingerprint, and a per-file
6
+ content-hash manifest. Saved atomically (temp + ``os.replace``) and **last**
7
+ in a refresh, so a crash leaves the previous, consistent manifest in place and
8
+ the refresh simply re-runs from the old commit.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import hashlib
14
+ import json
15
+ import os
16
+ from pathlib import Path
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ from agentforge_graph.ingest.pack import LanguagePack
21
+ from agentforge_graph.store.facade import STORE_SCHEMA_VERSION
22
+
23
+ _META = "meta.json"
24
+
25
+
26
+ def pack_fingerprint(pack: LanguagePack) -> str:
27
+ """A content fingerprint of everything about a pack that changes its
28
+ output: the two query files, the module style, and the descriptor map.
29
+ Bumping a ``.scm`` therefore changes the fingerprint and forces a full
30
+ re-index (correctness over speed) — no manual version bookkeeping."""
31
+ rules = ",".join(f"{k}={v.value}" for k, v in sorted(pack.descriptor_rules.kinds.items()))
32
+ blob = " ".join([pack.structure_queries, pack.reference_queries, pack.module_style, rules])
33
+ return hashlib.sha256(blob.encode("utf-8")).hexdigest()[:16]
34
+
35
+
36
+ class IndexMeta(BaseModel):
37
+ """The on-disk index manifest. Unknown keys are ignored on load, so an
38
+ older ``meta.json`` (just ``schema_version`` + ``indexed_commit``) upgrades
39
+ cleanly — the missing fields default and repopulate on the next index."""
40
+
41
+ schema_version: int = STORE_SCHEMA_VERSION
42
+ indexed_commit: str = "" # git HEAD at last index ("" if non-git)
43
+ pack_versions: dict[str, str] = Field(default_factory=dict) # lang_slug -> fingerprint
44
+ files: dict[str, str] = Field(default_factory=dict) # repo-rel path -> content_hash
45
+
46
+ @classmethod
47
+ def load(cls, root: str | Path) -> IndexMeta:
48
+ p = Path(root) / _META
49
+ if not p.exists():
50
+ return cls()
51
+ return cls.model_validate(json.loads(p.read_text()))
52
+
53
+ def save(self, root: str | Path) -> None:
54
+ p = Path(root) / _META
55
+ p.parent.mkdir(parents=True, exist_ok=True)
56
+ tmp = p.with_name(_META + ".tmp")
57
+ tmp.write_text(json.dumps(self.model_dump(), indent=2, sort_keys=True))
58
+ os.replace(tmp, p) # atomic on POSIX; the manifest is never half-written
59
+
60
+ def is_indexed(self) -> bool:
61
+ """True once a real index exists (files recorded or a commit pinned)."""
62
+ return bool(self.files) or bool(self.indexed_commit)
63
+
64
+ def packs_changed(self, packs: list[LanguagePack]) -> bool:
65
+ """A pack fingerprint changed (or a new pack appeared) since last index
66
+ → extraction semantics differ, force a full rebuild."""
67
+ current = {p.lang_slug: pack_fingerprint(p) for p in packs}
68
+ return any(self.pack_versions.get(slug) != fp for slug, fp in current.items())
69
+
70
+ @staticmethod
71
+ def fingerprints(packs: list[LanguagePack]) -> dict[str, str]:
72
+ return {p.lang_slug: pack_fingerprint(p) for p in packs}
@@ -0,0 +1,39 @@
1
+ """Ports the incremental indexer depends on but does not own (feat-009).
2
+
3
+ The ``IncrementalIndexer`` records symbol lifecycle (opened/closed) as it
4
+ applies a diff, but the deterministic ``ingest`` layer must not import the
5
+ higher ``temporal`` layer (ADR-0001 spirit). So it depends on this structural
6
+ ``Protocol``; the concrete ``temporal.SqliteTemporalRecorder`` satisfies it.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Iterable
12
+ from typing import TYPE_CHECKING, Protocol, runtime_checkable
13
+
14
+ if TYPE_CHECKING:
15
+ from agentforge_graph.core import GraphStore
16
+
17
+
18
+ @runtime_checkable
19
+ class TemporalRecorder(Protocol):
20
+ """Write port for the evolution log. ``open``/``close`` buffer; ``flush``
21
+ persists in one transaction at end-of-refresh."""
22
+
23
+ def open(self, symbol_ids: Iterable[str], at: str, ts: int) -> None: ...
24
+
25
+ def close(self, symbol_ids: Iterable[str], at: str, ts: int) -> None: ...
26
+
27
+ async def record_churn(
28
+ self,
29
+ graph: GraphStore,
30
+ repo_root: str,
31
+ paths: Iterable[str],
32
+ commit: str,
33
+ commit_ts: int,
34
+ ) -> None:
35
+ """Mine churn/authorship for ``paths`` over a window, store the bounded
36
+ aggregates, and denormalise ``introduced/last_changed/churn_*/top_authors``
37
+ onto the matching node ``attrs`` (feat-009 §4.5). No-op off the git path."""
38
+
39
+ async def flush(self) -> None: ...
@@ -0,0 +1,160 @@
1
+ """Language packs: everything language-specific about extraction, behind one
2
+ shape so the extractor (extractor.py) stays language-agnostic.
3
+
4
+ A pack pairs a tree-sitter grammar with two ``.scm`` query files and a small
5
+ rule set mapping capture names to node kinds. The capture-name vocabulary is
6
+ shared across packs (``def.class``, ``def.function``, ``def.method``,
7
+ ``name``, ``import``, ``import.module``, ``call``, ``call.callee``) so an
8
+ edge kind means the same thing in every language.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import posixpath
14
+ from typing import Literal
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+ from agentforge_graph.core import NodeKind
19
+
20
+ _INIT_BASENAMES = ("__init__.py", "__init__.pyi") # a file that *is* its package
21
+
22
+
23
+ class DescriptorRules(BaseModel):
24
+ """Maps a structure-query capture name (e.g. ``def.class``) to the node
25
+ kind it produces. Capture names prefixed ``def.`` mark definitions that
26
+ nest descriptors and own a ``CONTAINS`` subtree."""
27
+
28
+ kinds: dict[str, NodeKind] = Field(default_factory=dict)
29
+
30
+ def kind_for(self, capture: str) -> NodeKind | None:
31
+ return self.kinds.get(capture)
32
+
33
+
34
+ class LanguagePack(BaseModel):
35
+ """A language's grammar + queries + descriptor rules."""
36
+
37
+ language: str # human name, e.g. "python"
38
+ lang_slug: str # symbol-ID language slug, e.g. "py"
39
+ grammar: str # tree-sitter-language-pack grammar name
40
+ extensions: tuple[str, ...] # file extensions, e.g. (".py",)
41
+ structure_queries: str # .scm: defs/classes/imports
42
+ reference_queries: str # .scm: calls/attribute refs
43
+ descriptor_rules: DescriptorRules = Field(default_factory=DescriptorRules)
44
+ # How imports name modules: "dotted" (Python `a.b.c`), "relative" (TS/JS path
45
+ # specifiers like `./util`), or "go" (a package is a *directory*; import paths
46
+ # are full module paths the resolver suffix-matches to a repo dir). Drives
47
+ # module_path + resolve_import.
48
+ module_style: Literal["dotted", "relative", "go"] = "dotted"
49
+ # When True, an in-repo import that names no symbols (e.g. Ruby
50
+ # `require_relative "./x"`) binds *all* of the target file's top-level defs
51
+ # into the importer's scope — a wildcard import. Off for explicit-name
52
+ # languages (Python/TS/JS) where a bare import is side-effect-only.
53
+ wildcard_import: bool = False
54
+ # When True (with module_style="relative"), a *bare* specifier (no `./`) is
55
+ # still resolved against the importer's directory rather than treated as an
56
+ # external package — Ruby `require_relative "thor/command"` is file-relative
57
+ # regardless of a leading `./`. Off for TS/JS where bare = npm package.
58
+ relative_bare: bool = False
59
+ # Separator for namespace/package FQN imports (PHP `\`, Java/C# `.`). When set,
60
+ # the resolver maps an FQN import (`use App\Foo\Bar`) to the file declaring that
61
+ # class via the file's namespace declaration + symbol name. "" = no FQN model.
62
+ namespace_sep: str = ""
63
+ # When True (C#), an import names a *namespace* (`using App.Geo`), not a class:
64
+ # resolve it to every in-repo file declaring that namespace and bind all their
65
+ # symbols. When False (PHP/Java), an import is a single class FQN.
66
+ namespace_import_prefix: bool = False
67
+ # When True (Rust), the file's "namespace" is derived from its path (the
68
+ # crate-relative module path, `src/a/b.rs` -> `a/b`) rather than a declaration,
69
+ # and a leading `crate::` is stripped from imports. Used with namespace_sep="::".
70
+ namespace_from_path: bool = False
71
+
72
+ def _strip_ext(self, path: str) -> str:
73
+ for ext in self.extensions:
74
+ if path.endswith(ext):
75
+ return path[: -len(ext)]
76
+ return path
77
+
78
+ def module_path(self, repo_relative_path: str) -> str:
79
+ """The module key a file is imported as. ``dotted``: ``a/b/c.py`` ->
80
+ ``a.b.c`` (drops a trailing ``__init__``). ``relative``: the
81
+ extension-stripped path, ``a/b/c.ts`` -> ``a/b/c``. ``go``: a package is
82
+ a directory, so the key is the file's *directory*, ``a/b/c.go`` ->
83
+ ``a/b`` (every ``.go`` file in a dir shares one package key)."""
84
+ no_ext = self._strip_ext(repo_relative_path)
85
+ if self.module_style == "go":
86
+ return posixpath.dirname(repo_relative_path)
87
+ if self.module_style == "relative":
88
+ return no_ext
89
+ segs = [s for s in no_ext.split("/") if s]
90
+ if segs and segs[-1] == "__init__":
91
+ segs = segs[:-1]
92
+ return ".".join(segs)
93
+
94
+ def resolve_import(self, importer_path: str, raw_module: str, importer_module: str = "") -> str:
95
+ """Map an import as written in ``importer_path`` to a module key
96
+ comparable to ``module_path``.
97
+
98
+ ``relative`` (TS/JS): a ``./``/``../`` specifier is resolved against the
99
+ importer's directory; a bare specifier (``react``) stays as-is (external).
100
+
101
+ ``dotted`` (Python): an absolute import (``a.b.c``) is identity; a
102
+ **relative** import (leading dots, e.g. ``.utils`` / ``..pkg.mod`` / ``.``)
103
+ is resolved against ``importer_module`` — the importer's own (source-root
104
+ stripped) module key — to an absolute key (BUG-004). One leading dot is the
105
+ importer's package; each extra dot ascends one level."""
106
+ if self.module_style == "go":
107
+ # A Go import is a full module path ("example.com/m/internal/bar").
108
+ # We can't know the go.mod module prefix here, so return it as-is;
109
+ # the resolver suffix-matches it against in-repo package dirs.
110
+ return raw_module
111
+ if self.module_style == "relative":
112
+ target = self._strip_ext(raw_module)
113
+ if target.startswith("./") or target.startswith("../") or self.relative_bare:
114
+ base = posixpath.dirname(importer_path)
115
+ return posixpath.normpath(posixpath.join(base, target))
116
+ return target
117
+ # dotted
118
+ dots = len(raw_module) - len(raw_module.lstrip("."))
119
+ if not dots:
120
+ return raw_module # absolute dotted import: identity
121
+ remainder = raw_module[dots:] # name after the dots: "utils", "pkg.mod", ""
122
+ segs = [s for s in importer_module.split(".") if s]
123
+ # a regular module file lives *in* its package; an __init__ file *is* it
124
+ if posixpath.basename(importer_path) not in _INIT_BASENAMES and segs:
125
+ segs = segs[:-1]
126
+ up = dots - 1 # the first dot is the importer's package; extras ascend
127
+ if up:
128
+ segs = segs[:-up] if up <= len(segs) else []
129
+ base = ".".join(segs)
130
+ if remainder:
131
+ return f"{base}.{remainder}" if base else remainder
132
+ return base
133
+
134
+
135
+ class PackRegistry:
136
+ """Resolves a file to the pack that handles it, by extension."""
137
+
138
+ def __init__(self, packs: list[LanguagePack]) -> None:
139
+ self._packs = list(packs)
140
+ self._by_ext: dict[str, LanguagePack] = {}
141
+ self._by_lang: dict[str, LanguagePack] = {}
142
+ self._by_slug: dict[str, LanguagePack] = {}
143
+ for pack in packs:
144
+ self._by_lang[pack.language] = pack
145
+ self._by_slug[pack.lang_slug] = pack
146
+ for ext in pack.extensions:
147
+ self._by_ext[ext] = pack
148
+
149
+ @property
150
+ def packs(self) -> list[LanguagePack]:
151
+ return list(self._packs)
152
+
153
+ def for_extension(self, suffix: str) -> LanguagePack | None:
154
+ return self._by_ext.get(suffix)
155
+
156
+ def for_language(self, name: str) -> LanguagePack | None:
157
+ return self._by_lang.get(name)
158
+
159
+ def for_slug(self, slug: str) -> LanguagePack | None:
160
+ return self._by_slug.get(slug)
@@ -0,0 +1,34 @@
1
+ """Built-in language packs. v0.1 ships Python/TypeScript/JavaScript/Go (Tier A);
2
+ the rest of the top-10 languages land as follow-up packs over this same harness."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from agentforge_graph.ingest.pack import PackRegistry
7
+
8
+ from .cpp import CPP_PACK
9
+ from .csharp import CSHARP_PACK
10
+ from .go import GO_PACK
11
+ from .java import JAVA_PACK
12
+ from .javascript import JAVASCRIPT_PACK
13
+ from .php import PHP_PACK
14
+ from .python import PYTHON_PACK
15
+ from .ruby import RUBY_PACK
16
+ from .rust import RUST_PACK
17
+ from .typescript import TYPESCRIPT_PACK
18
+
19
+ BUILTIN_PACKS = [
20
+ PYTHON_PACK,
21
+ TYPESCRIPT_PACK,
22
+ JAVASCRIPT_PACK,
23
+ GO_PACK,
24
+ RUBY_PACK,
25
+ PHP_PACK,
26
+ JAVA_PACK,
27
+ CSHARP_PACK,
28
+ CPP_PACK,
29
+ RUST_PACK,
30
+ ]
31
+
32
+
33
+ def builtin_registry() -> PackRegistry:
34
+ return PackRegistry(BUILTIN_PACKS)
@@ -0,0 +1,35 @@
1
+ """The C++ language pack (Tier B — structure + heuristic refs).
2
+
3
+ Tier B per the language scope: comprehensive symbol extraction (classes, structs,
4
+ enums, free functions, methods) + quoted `#include` resolution, but reference
5
+ resolution is heuristic — C++'s overloading, templates, and `obj.method()` /
6
+ `ns::fn()` member access keep most calls unresolved (ADR-0004). Quoted includes
7
+ (`#include "geo/shape.h"`) are resolved relative to the including file (a bare
8
+ path, like Ruby); `<system>` includes are skipped.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from pathlib import Path
14
+
15
+ from agentforge_graph.core import NodeKind
16
+ from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
17
+
18
+ _HERE = Path(__file__).parent
19
+
20
+ CPP_PACK = LanguagePack(
21
+ language="cpp",
22
+ lang_slug="cpp",
23
+ grammar="cpp",
24
+ extensions=(".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx", ".h"),
25
+ structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
26
+ reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
27
+ descriptor_rules=DescriptorRules(
28
+ kinds={
29
+ "def.class": NodeKind.CLASS, # class + struct + enum
30
+ "def.function": NodeKind.FUNCTION, # free fn + method (promoted in a class)
31
+ }
32
+ ),
33
+ module_style="relative", # `#include "geo/shape.h"` resolved relative to the file
34
+ relative_bare=True, # includes have no leading `./`
35
+ )
@@ -0,0 +1,15 @@
1
+ ; C++ reference queries (feat-002, pack-cpp; Tier B — heuristic).
2
+ ; Plain call `f(...)`; member/arrow calls (`this->f()`, `obj.f()`, `ptr->f()`)
3
+ ; capture the receiver so `this->f()` binds to a method of the enclosing class;
4
+ ; any other receiver is left unresolved (member access, ADR-0004). `ns::f()`
5
+ ; qualified calls are not captured here.
6
+
7
+ (call_expression
8
+ function: (identifier) @call.callee) @call
9
+
10
+ ; `this->f()` / `obj.f()` / `ptr->f()` — the receiver is the field_expression's
11
+ ; argument (`this` is a named node, captured by `(_)`).
12
+ (call_expression
13
+ function: (field_expression
14
+ argument: (_) @call.recv
15
+ field: (field_identifier) @call.callee)) @call
@@ -0,0 +1,49 @@
1
+ ; C++ structure queries (feat-002, pack-cpp; Tier B — structure + heuristic refs).
2
+ ; Namespaces are scopes, not captured as defs (so free functions inside them stay
3
+ ; Function, not Method). Header/impl split: methods are declared in the class body
4
+ ; and may be defined out-of-line as `Type::method`.
5
+
6
+ ; --- definitions ---
7
+ (class_specifier
8
+ name: (type_identifier) @name) @def.class
9
+
10
+ (struct_specifier
11
+ name: (type_identifier) @name) @def.class
12
+
13
+ (enum_specifier
14
+ name: (type_identifier) @name) @def.class
15
+
16
+ ; free function definition: `double compute(double x) { … }`
17
+ (function_definition
18
+ declarator: (function_declarator
19
+ declarator: (identifier) @name)) @def.function
20
+
21
+ ; out-of-line method definition: `double Circle::area() const { … }`
22
+ (function_definition
23
+ declarator: (function_declarator
24
+ declarator: (qualified_identifier
25
+ name: (identifier) @name))) @def.function
26
+
27
+ ; in-class method declaration: `double area() const;`
28
+ (field_declaration
29
+ declarator: (function_declarator
30
+ declarator: (field_identifier) @name)) @def.function
31
+
32
+ ; in-class *inline* method definition: `double area() const { … }`. The name is a
33
+ ; field_identifier (unlike a free function's plain identifier), so this is a
34
+ ; distinct pattern; the extractor promotes it to METHOD because its enclosing
35
+ ; scope is the class body. Without this the inline method — and any `this->f()`
36
+ ; calls in its body — had no symbol to attach to (BUG-006 residual).
37
+ (function_definition
38
+ declarator: (function_declarator
39
+ declarator: (field_identifier) @name)) @def.function
40
+
41
+ ; constructor/destructor declaration: `Circle(double r);`
42
+ (declaration
43
+ declarator: (function_declarator
44
+ declarator: (identifier) @name)) @def.function
45
+
46
+ ; --- imports ---
47
+ ; `#include "geo/shape.h"` (quoted -> in-repo, relative). `<system>` is skipped.
48
+ (preproc_include
49
+ (string_literal (string_content) @import.module)) @import
@@ -0,0 +1,35 @@
1
+ """The C# language pack (Tier A — structure + namespace-prefix resolution).
2
+
3
+ C# differs from PHP/Java: `using App.Geo` imports a *namespace* (not a class), so
4
+ it resolves to every in-repo file declaring that namespace (and binds all their
5
+ symbols), rather than to one class FQN. Extracts class/struct/enum/record
6
+ (→Class), interface (→Interface), methods + constructors. Member calls
7
+ (`obj.M()`) stay unresolved (ADR-0004).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+ from agentforge_graph.core import NodeKind
15
+ from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
16
+
17
+ _HERE = Path(__file__).parent
18
+
19
+ CSHARP_PACK = LanguagePack(
20
+ language="csharp",
21
+ lang_slug="cs",
22
+ grammar="csharp",
23
+ extensions=(".cs",),
24
+ structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
25
+ reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
26
+ descriptor_rules=DescriptorRules(
27
+ kinds={
28
+ "def.class": NodeKind.CLASS, # class + struct + enum + record
29
+ "def.interface": NodeKind.INTERFACE,
30
+ "def.function": NodeKind.FUNCTION, # method + constructor (promoted)
31
+ }
32
+ ),
33
+ namespace_sep=".",
34
+ namespace_import_prefix=True, # `using App.Geo` = a namespace, not a class
35
+ )
@@ -0,0 +1,12 @@
1
+ ; C# reference queries (feat-002, pack-csharp).
2
+ ; Plain call `F(...)` and member call `obj.F(...)`. @call.recv captures the
3
+ ; receiver so `this.F()` binds to the enclosing class's method (BUG-006); other
4
+ ; receivers stay unresolved (member access, ADR-0004).
5
+
6
+ (invocation_expression
7
+ function: (identifier) @call.callee) @call
8
+
9
+ (invocation_expression
10
+ function: (member_access_expression
11
+ expression: _ @call.recv
12
+ name: (identifier) @call.callee)) @call