agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,847 @@
1
+ """``CodeGraph`` — the top-level user facade (spec §4.1).
2
+
3
+ ``index`` builds the embedded store (feat-003), runs the pipeline, and
4
+ returns a handle exposing the ``Store`` and the ``IndexReport``. ``open``
5
+ re-opens an existing index without re-indexing. This is the
6
+ ``CodeGraph.open`` feat-003 deferred here (where ``index`` lives).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import subprocess
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ from agentforge_graph.store import Store
16
+
17
+ from .pack import PackRegistry
18
+ from .packs import BUILTIN_PACKS, builtin_registry
19
+ from .pipeline import IngestPipeline
20
+ from .report import IndexReport, ModelInfo, RouteInfo, ServiceInfo
21
+ from .source import RepoSource
22
+
23
+ if TYPE_CHECKING:
24
+ # embed/retrieve import ingest, so reference their types under TYPE_CHECKING.
25
+ from agentforge_graph.embed import EmbedReport
26
+ from agentforge_graph.enrich import SummaryInfo, TaggedInfo
27
+ from agentforge_graph.knowledge import DecisionInfo
28
+ from agentforge_graph.repomap import RankedSymbol
29
+ from agentforge_graph.retrieve import ContextPack
30
+ from agentforge_graph.retrieve.retriever import Mode
31
+
32
+
33
+ def _git_commit(repo_path: str | Path) -> str:
34
+ try:
35
+ out = subprocess.run(
36
+ ["git", "-C", str(repo_path), "rev-parse", "HEAD"],
37
+ capture_output=True,
38
+ text=True,
39
+ check=True,
40
+ )
41
+ return out.stdout.strip()
42
+ except (subprocess.SubprocessError, OSError):
43
+ return ""
44
+
45
+
46
+ def _commit_time(repo_path: str | Path, commit: str) -> int:
47
+ """Author time (epoch seconds) of ``commit`` — the timestamp stamped on
48
+ feat-009 events. 0 if non-git / unknown."""
49
+ if not commit:
50
+ return 0
51
+ try:
52
+ out = subprocess.run(
53
+ ["git", "-C", str(repo_path), "show", "-s", "--format=%ct", commit],
54
+ capture_output=True,
55
+ text=True,
56
+ check=True,
57
+ )
58
+ return int(out.stdout.strip() or 0)
59
+ except (subprocess.SubprocessError, OSError, ValueError):
60
+ return 0
61
+
62
+
63
+ def _build_recorder(repo_path: str | Path, config: str | Path | None, root: Path, commit: str): # type: ignore[no-untyped-def]
64
+ """Build the feat-009 evolution-log recorder when ``temporal.enabled`` and
65
+ the source is a git repo; else ``None``. Lazy-imports ``temporal`` so the
66
+ module is never loaded when the feature is off."""
67
+ from agentforge_graph.config import TemporalConfig
68
+
69
+ if not commit or not TemporalConfig.load(config).enabled:
70
+ return None
71
+ from agentforge_graph.temporal import build_recorder
72
+
73
+ return build_recorder(str(root))
74
+
75
+
76
+ async def _prune_temporal(repo_path: str | Path, config: str | Path | None, root: Path) -> None:
77
+ """Retention pruning (feat-009 §4.10): drop CLOSED events older than the
78
+ ``retention_commits`` horizon at the end of an index/refresh. No-op when
79
+ temporal is off, no sidecar exists, or history is shorter than the horizon."""
80
+ from agentforge_graph.config import TemporalConfig
81
+
82
+ cfg = TemporalConfig.load(config)
83
+ if not cfg.enabled or not (root / "temporal.db").exists():
84
+ return
85
+ horizon = _commit_time(repo_path, f"HEAD~{cfg.retention_commits}")
86
+ if horizon <= 0: # fewer than retention_commits commits → nothing to prune
87
+ return
88
+ from agentforge_graph.temporal import TemporalStore
89
+
90
+ await TemporalStore.open(root).prune(horizon)
91
+
92
+
93
+ def _framework_extractor(
94
+ repo_path: str | Path, config: str | Path | None, registry: PackRegistry
95
+ ) -> Any:
96
+ """Detect the framework packs active for this repo (feat-011) and wrap them
97
+ in a ``FrameworkExtractor``. Inactive (no framework / ``frameworks: off``)
98
+ yields an empty extractor that the pipeline skips."""
99
+ from agentforge_graph.frameworks import (
100
+ FrameworkExtractor,
101
+ active_frameworks,
102
+ builtin_framework_registry,
103
+ )
104
+
105
+ exts = {ext for p in registry.packs for ext in p.extensions}
106
+ packs = active_frameworks(repo_path, config, builtin_framework_registry(), exts)
107
+ return FrameworkExtractor(packs)
108
+
109
+
110
+ async def _ingest_knowledge(
111
+ store: Store,
112
+ repo_path: str | Path,
113
+ config: str | Path | None,
114
+ repo: str,
115
+ commit: str,
116
+ registry: PackRegistry,
117
+ report: IndexReport,
118
+ ) -> None:
119
+ """Run the ADR/knowledge pass (feat-010) after code indexing, so mention
120
+ linking sees current code. No-op when ``knowledge.enabled`` is false."""
121
+ from agentforge_graph.config import KnowledgeConfig
122
+ from agentforge_graph.knowledge import CommitIngestor, KnowledgeIngestor
123
+
124
+ cfg = KnowledgeConfig.load(config)
125
+ if not cfg.enabled:
126
+ return
127
+ exts = {ext for p in registry.packs for ext in p.extensions}
128
+ stats = await KnowledgeIngestor(repo, commit).ingest(
129
+ store.graph, repo_path, cfg.adr_globs, exts, doc_globs=cfg.doc_globs
130
+ )
131
+ report.decisions_indexed = stats.decisions_indexed
132
+ report.governs_resolved = stats.governs_resolved
133
+ report.mentions_unresolved = stats.mentions_unresolved
134
+ report.docs_indexed = stats.docs_indexed
135
+ report.describes_resolved = stats.describes_resolved
136
+ if cfg.commit_messages:
137
+ report.commits_indexed = await CommitIngestor(
138
+ repo, repo_path, commit, limit=cfg.commit_messages_limit
139
+ ).ingest(store.graph)
140
+ if stats.decisions_indexed:
141
+ report.by_node_kind["Decision"] = (
142
+ report.by_node_kind.get("Decision", 0) + stats.decisions_indexed
143
+ )
144
+
145
+
146
+ def _save_meta(
147
+ root: Path, commit: str, registry: PackRegistry, file_hashes: dict[str, str]
148
+ ) -> None:
149
+ """Persist the index manifest atomically, *last* — so a crash anywhere
150
+ earlier leaves the previous consistent manifest and the next run re-derives
151
+ the diff from it (feat-004)."""
152
+ from .incremental import IndexMeta
153
+
154
+ IndexMeta(
155
+ indexed_commit=commit,
156
+ pack_versions=IndexMeta.fingerprints(registry.packs),
157
+ files=file_hashes,
158
+ ).save(root)
159
+
160
+
161
+ def _registry_for(languages: str | list[str] | None) -> PackRegistry:
162
+ if languages is None or languages == "auto":
163
+ return builtin_registry()
164
+ wanted = {languages} if isinstance(languages, str) else set(languages)
165
+ packs = [p for p in BUILTIN_PACKS if p.language in wanted or p.lang_slug in wanted]
166
+ return PackRegistry(packs)
167
+
168
+
169
+ def _source_registry(
170
+ repo_path: str | Path,
171
+ config: str | Path | None,
172
+ languages: str | list[str] | None,
173
+ include: list[str] | None = None,
174
+ exclude: list[str] | None = None,
175
+ ) -> tuple[RepoSource, PackRegistry]:
176
+ from agentforge_graph.config import IngestConfig
177
+
178
+ ingest = IngestConfig.load(config)
179
+ registry = _registry_for(languages if languages is not None else ingest.languages)
180
+ source = RepoSource(
181
+ repo_path,
182
+ include=include,
183
+ exclude=ingest.exclude + (exclude or []),
184
+ max_file_kb=ingest.max_file_kb,
185
+ )
186
+ return source, registry
187
+
188
+
189
+ class CodeGraph:
190
+ def __init__(
191
+ self,
192
+ store: Store,
193
+ repo_path: str | Path = ".",
194
+ config: str | Path | None = None,
195
+ languages: str | list[str] | None = None,
196
+ report: IndexReport | None = None,
197
+ ) -> None:
198
+ self._store = store
199
+ self._repo_path = repo_path
200
+ self._config = config
201
+ self._languages = languages
202
+ self._report = report
203
+ self._embed_report: EmbedReport | None = None
204
+
205
+ @classmethod
206
+ async def index(
207
+ cls,
208
+ repo_path: str | Path = ".",
209
+ languages: str | list[str] | None = None,
210
+ config: str | Path | None = None,
211
+ include: list[str] | None = None,
212
+ exclude: list[str] | None = None,
213
+ embed: bool = False,
214
+ full: bool = False,
215
+ ) -> CodeGraph:
216
+ """Index ``repo_path``. Incremental by default once a prior index
217
+ exists (feat-004) — only the diff is re-extracted/re-resolved. ``full``
218
+ (or a changed pack fingerprint / schema bump / ``ingest.incremental:
219
+ false``) forces a clean rebuild."""
220
+ from agentforge_graph.config import IngestConfig, StoreConfig
221
+
222
+ from .incremental import ChangeDetector, IndexMeta
223
+
224
+ store = await Store.open(repo_path, config)
225
+ source, registry = _source_registry(repo_path, config, languages, include, exclude)
226
+ repo = Path(repo_path).resolve().name
227
+ commit = _git_commit(repo_path)
228
+ root = Path(repo_path) / StoreConfig.load(config).path
229
+ ingest_cfg = IngestConfig.load(config)
230
+ meta = IndexMeta.load(root)
231
+
232
+ use_incremental = (
233
+ ingest_cfg.incremental
234
+ and not full
235
+ and meta.is_indexed()
236
+ and not meta.packs_changed(registry.packs)
237
+ )
238
+ cg = cls(store, repo_path, config, languages)
239
+ frameworks = _framework_extractor(repo_path, config, registry)
240
+ recorder = _build_recorder(repo_path, config, root, commit) # feat-009 (None if off)
241
+ result = await ChangeDetector(repo_path).detect(source, meta, registry)
242
+ if use_incremental:
243
+ report = await cg._apply_changes(
244
+ source,
245
+ registry,
246
+ repo,
247
+ commit,
248
+ result.changes,
249
+ ingest_cfg.resolve_scope_hops,
250
+ root,
251
+ frameworks,
252
+ recorder,
253
+ _commit_time(repo_path, commit),
254
+ )
255
+ else:
256
+ report = await IngestPipeline(repo=repo, commit=commit, frameworks=frameworks).run(
257
+ source, store.graph, registry
258
+ )
259
+ if recorder is not None: # full index: open intervals for all symbols
260
+ from agentforge_graph.temporal import seed_symbols
261
+
262
+ await seed_symbols(
263
+ store.graph,
264
+ recorder,
265
+ commit,
266
+ _commit_time(repo_path, commit),
267
+ repo_root=str(repo_path),
268
+ )
269
+ cg._report = report
270
+ await _ingest_knowledge(store, repo_path, config, repo, commit, registry, report)
271
+ _save_meta(root, commit, registry, result.file_hashes)
272
+ await _prune_temporal(repo_path, config, root)
273
+ if embed:
274
+ await cg.embed()
275
+ return cg
276
+
277
+ async def refresh(self) -> IndexReport:
278
+ """Re-index only what changed since the last index (feat-004). The
279
+ explicit incremental entry point; ``index()`` calls the same path."""
280
+ from agentforge_graph.config import IngestConfig, StoreConfig
281
+
282
+ from .incremental import ChangeDetector, IndexMeta
283
+
284
+ source, registry = _source_registry(self._repo_path, self._config, self._languages)
285
+ repo = Path(self._repo_path).resolve().name
286
+ commit = _git_commit(self._repo_path)
287
+ root = Path(self._repo_path) / StoreConfig.load(self._config).path
288
+ ingest_cfg = IngestConfig.load(self._config)
289
+ meta = IndexMeta.load(root)
290
+ frameworks = _framework_extractor(self._repo_path, self._config, registry)
291
+ recorder = _build_recorder(self._repo_path, self._config, root, commit) # feat-009
292
+ result = await ChangeDetector(self._repo_path).detect(source, meta, registry)
293
+ report = await self._apply_changes(
294
+ source,
295
+ registry,
296
+ repo,
297
+ commit,
298
+ result.changes,
299
+ ingest_cfg.resolve_scope_hops,
300
+ root,
301
+ frameworks,
302
+ recorder,
303
+ _commit_time(self._repo_path, commit),
304
+ )
305
+ self._report = report
306
+ await _ingest_knowledge(
307
+ self._store, self._repo_path, self._config, repo, commit, registry, report
308
+ )
309
+ _save_meta(root, commit, registry, result.file_hashes)
310
+ await _prune_temporal(self._repo_path, self._config, root)
311
+ return report
312
+
313
+ async def _apply_changes(
314
+ self,
315
+ source: RepoSource,
316
+ registry: PackRegistry,
317
+ repo: str,
318
+ commit: str,
319
+ changes: object,
320
+ resolve_scope_hops: int,
321
+ root: Path,
322
+ frameworks: Any = None,
323
+ recorder: Any = None,
324
+ commit_ts: int = 0,
325
+ ) -> IndexReport:
326
+ from .incremental import ChangeSet, DirtySet, IncrementalIndexer
327
+
328
+ assert isinstance(changes, ChangeSet)
329
+ indexer = IncrementalIndexer(
330
+ self._store,
331
+ source,
332
+ registry,
333
+ repo,
334
+ commit,
335
+ resolve_scope_hops=resolve_scope_hops,
336
+ dirty=DirtySet(root),
337
+ frameworks=frameworks,
338
+ recorder=recorder,
339
+ commit_ts=commit_ts,
340
+ )
341
+ return await indexer.refresh(changes)
342
+
343
+ @classmethod
344
+ async def open(
345
+ cls,
346
+ repo_path: str | Path = ".",
347
+ config: str | Path | None = None,
348
+ languages: str | list[str] | None = None,
349
+ ) -> CodeGraph:
350
+ return cls(await Store.open(repo_path, config), repo_path, config, languages)
351
+
352
+ async def embed(self, embedder: object | None = None, only_dirty: bool = False) -> EmbedReport:
353
+ """Chunk and embed everything indexed. Builds the embedder from
354
+ ``EmbedConfig`` if not supplied. With ``only_dirty`` (feat-004), embed
355
+ only the files a refresh dirtied for the ``embeddings`` consumer and
356
+ mark them clean — the cheap path after an incremental index."""
357
+ from agentforge_graph.chunking import CASTChunker
358
+ from agentforge_graph.config import ChunkingConfig, EmbedConfig, StoreConfig
359
+ from agentforge_graph.core import SymbolID
360
+ from agentforge_graph.embed import Embedder, EmbedPipeline, embedder_from_config
361
+
362
+ from .incremental import DirtySet
363
+
364
+ chunking = ChunkingConfig.load(self._config)
365
+ emb = (
366
+ embedder
367
+ if isinstance(embedder, Embedder)
368
+ else embedder_from_config(EmbedConfig.load(self._config))
369
+ )
370
+ source, registry = _source_registry(self._repo_path, self._config, self._languages)
371
+ pipeline = EmbedPipeline(
372
+ CASTChunker(chunking.max_tokens, chunking.min_tokens),
373
+ emb,
374
+ commit=_git_commit(self._repo_path),
375
+ )
376
+ root = Path(self._repo_path) / StoreConfig.load(self._config).path
377
+ dirty: DirtySet | None = None
378
+ only_paths: set[str] | None = None
379
+ ids: list[str] = []
380
+ if only_dirty:
381
+ dirty = DirtySet(root)
382
+ ids = await dirty.dirty_for("embeddings")
383
+ only_paths = {SymbolID.parse(i).path for i in ids}
384
+ self._embed_report = await pipeline.run(
385
+ self._store, source, registry, only_paths=only_paths, doc_root=root
386
+ )
387
+ if dirty is not None:
388
+ await dirty.mark_clean("embeddings", ids)
389
+ return self._embed_report
390
+
391
+ async def retrieve(
392
+ self,
393
+ query: str | None = None,
394
+ symbol: str | None = None,
395
+ mode: Mode = "context",
396
+ k: int | None = None,
397
+ depth: int | None = None,
398
+ embedder: object | None = None,
399
+ include_llm_facts: bool = True,
400
+ as_of: str | None = None,
401
+ ) -> ContextPack:
402
+ """Hybrid retrieval (feat-006): vector entry + graph expansion.
403
+ ``include_llm_facts=False`` excludes llm-derived items (decisions tags
404
+ etc.) wholesale (feat-010/012). ``as_of=<commit>`` (feat-009) restricts
405
+ results to the symbols valid at that commit — code symbols added after it
406
+ are dropped; raises ``TemporalError`` with no temporal data or beyond the
407
+ retention horizon."""
408
+ from agentforge_graph.config import EmbedConfig, RetrieveConfig
409
+ from agentforge_graph.embed import Embedder, embedder_from_config
410
+ from agentforge_graph.retrieve import Retriever
411
+ from agentforge_graph.retrieve.rerank import reranker_from_config
412
+
413
+ allow_ids: set[str] | None = None
414
+ if as_of is not None:
415
+ from agentforge_graph.temporal import TemporalError
416
+
417
+ ti = self._temporal_index()
418
+ if ti is None:
419
+ raise TemporalError("as_of requested but no temporal log exists")
420
+ allow_ids = await ti.alive_at(as_of)
421
+
422
+ emb = (
423
+ embedder
424
+ if isinstance(embedder, Embedder)
425
+ else embedder_from_config(EmbedConfig.load(self._config))
426
+ )
427
+ rcfg = RetrieveConfig.load(self._config)
428
+ retriever = Retriever(
429
+ self._store,
430
+ emb,
431
+ rcfg,
432
+ reranker=reranker_from_config(rcfg.rerank, rcfg.rerank_weight, rcfg.rerank_model),
433
+ )
434
+ return await retriever.retrieve(
435
+ query=query,
436
+ symbol=symbol,
437
+ mode=mode,
438
+ k=k,
439
+ depth=depth,
440
+ include_llm_facts=include_llm_facts,
441
+ allow_ids=allow_ids,
442
+ )
443
+
444
+ async def repo_map(
445
+ self,
446
+ budget_tokens: int | None = None,
447
+ focus: list[str] | None = None,
448
+ scope: str | None = None,
449
+ ) -> str:
450
+ """Budget-aware, centrality-ranked repo map (feat-007)."""
451
+ from agentforge_graph.config import RepoMapConfig
452
+ from agentforge_graph.repomap import RepoMap
453
+
454
+ rm = RepoMap(self._store, RepoMapConfig.load(self._config))
455
+ return await rm.render(budget_tokens=budget_tokens, focus=focus, scope=scope)
456
+
457
+ async def ranked_symbols(
458
+ self, k: int = 100, focus: list[str] | None = None
459
+ ) -> list[RankedSymbol]:
460
+ from agentforge_graph.config import RepoMapConfig
461
+ from agentforge_graph.repomap import RepoMap
462
+
463
+ rm = RepoMap(self._store, RepoMapConfig.load(self._config))
464
+ return await rm.ranked_symbols(k=k, focus=focus)
465
+
466
+ async def routes(self) -> list[RouteInfo]:
467
+ """Every extracted endpoint (feat-011): method, path pattern, handler
468
+ symbol and source location, sorted by (path, method)."""
469
+ from agentforge_graph.core import GraphQuery, NodeKind, SymbolID
470
+
471
+ nodes = (
472
+ await self._store.graph.query(GraphQuery(kinds=[NodeKind.ROUTE], limit=10_000_000))
473
+ ).nodes
474
+ routes = [
475
+ RouteInfo(
476
+ method=str(n.attrs.get("method", "")),
477
+ path=str(n.attrs.get("path", "")),
478
+ framework=str(n.attrs.get("framework", "")),
479
+ handler=str(n.attrs.get("handler", "")),
480
+ file=SymbolID.parse(n.id).path,
481
+ line=n.span[0] if n.span else 0,
482
+ )
483
+ for n in nodes
484
+ ]
485
+ routes.sort(key=lambda r: (r.path, r.method))
486
+ return routes
487
+
488
+ async def models(self) -> list[ModelInfo]:
489
+ """Every extracted ORM data model (feat-011): table, framework, mapped
490
+ field names, ``RELATES_TO`` relations (FK / relationship), the underlying
491
+ class symbol and source location, sorted by name."""
492
+ from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, SymbolID
493
+
494
+ nodes = (
495
+ await self._store.graph.query(GraphQuery(kinds=[NodeKind.DATA_MODEL], limit=10_000_000))
496
+ ).nodes
497
+ name_of = {n.id: str(n.attrs.get("table") or n.name) for n in nodes}
498
+ models: list[ModelInfo] = []
499
+ for n in nodes:
500
+ fields = await self._store.graph.neighbors(n.id, [EdgeKind.HAS_FIELD], depth=1)
501
+ field_names = sorted(f.name for f in fields if f.kind is NodeKind.VARIABLE and f.name)
502
+ rel_edges = await self._store.graph.adjacent(
503
+ n.id, [EdgeKind.RELATES_TO], direction="out"
504
+ )
505
+ relations = sorted(
506
+ (
507
+ {
508
+ "to": name_of.get(e.dst, e.dst),
509
+ "kind": str(e.attrs.get("kind", "")),
510
+ "via": str(e.attrs.get("via", "")),
511
+ }
512
+ for e in rel_edges
513
+ ),
514
+ key=lambda r: (r["to"], r["via"]),
515
+ )
516
+ models.append(
517
+ ModelInfo(
518
+ name=str(n.attrs.get("table") or n.name),
519
+ table=str(n.attrs.get("table", "")),
520
+ framework=str(n.attrs.get("framework", "")),
521
+ fields=field_names,
522
+ relations=relations,
523
+ cls=str(n.attrs.get("class", "")),
524
+ file=SymbolID.parse(n.id).path,
525
+ line=n.span[0] if n.span else 0,
526
+ )
527
+ )
528
+ models.sort(key=lambda m: m.name)
529
+ return models
530
+
531
+ async def services(self) -> list[ServiceInfo]:
532
+ """Every DI-provided service (feat-011): provider name, framework, the
533
+ consumer symbols it is ``INJECTED_INTO``, and source location, sorted by
534
+ name."""
535
+ from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, SymbolID
536
+
537
+ nodes = (
538
+ await self._store.graph.query(GraphQuery(kinds=[NodeKind.SERVICE], limit=10_000_000))
539
+ ).nodes
540
+ services: list[ServiceInfo] = []
541
+ for n in nodes:
542
+ edges = await self._store.graph.adjacent(
543
+ n.id, [EdgeKind.INJECTED_INTO], direction="out"
544
+ )
545
+ services.append(
546
+ ServiceInfo(
547
+ name=str(n.attrs.get("provider") or n.name),
548
+ framework=str(n.attrs.get("framework", "")),
549
+ injected_into=sorted(e.dst for e in edges),
550
+ file=SymbolID.parse(n.id).path,
551
+ line=n.span[0] if n.span else 0,
552
+ )
553
+ )
554
+ services.sort(key=lambda s: s.name)
555
+ return services
556
+
557
+ def _temporal_index(self) -> Any:
558
+ """A ``TemporalIndex`` over the sidecar, or ``None`` when the evolution
559
+ log is absent (temporal never enabled / no git). Lazy-imports the
560
+ higher temporal layer (ADR-0001)."""
561
+ from agentforge_graph.config import StoreConfig, TemporalConfig
562
+
563
+ root = Path(self._repo_path) / StoreConfig.load(self._config).path
564
+ if not (root / "temporal.db").exists():
565
+ return None
566
+ from agentforge_graph.temporal import TemporalIndex, TemporalStore
567
+
568
+ return TemporalIndex(
569
+ TemporalStore.open(root),
570
+ self._store.graph,
571
+ repo_root=str(self._repo_path),
572
+ retention_commits=TemporalConfig.load(self._config).retention_commits,
573
+ )
574
+
575
+ async def history(self, symbol_id: str) -> Any:
576
+ """A symbol's evolution (feat-009): introduced / last-changed / churn /
577
+ authors / lifecycle events. ``None`` if the temporal layer has no data."""
578
+ ti = self._temporal_index()
579
+ return await ti.history(symbol_id) if ti is not None else None
580
+
581
+ async def changed_since(self, ref: str, scope: str | None = None) -> list[Any]:
582
+ """Symbols changed since ``ref`` (feat-009), newest first, optionally
583
+ filtered to a path glob/prefix ``scope``. Empty if no temporal data."""
584
+ ti = self._temporal_index()
585
+ return await ti.changed_since(ref, scope) if ti is not None else []
586
+
587
+ async def backfill(self, history: int) -> Any:
588
+ """Seed the evolution log from git history (feat-009 chunk 4):
589
+ ``history`` commits replayed into the temporal sidecar. Returns a
590
+ ``BackfillReport``; a no-op (``ran=False``) when temporal is off, the
591
+ range is already covered, or it isn't a git repo."""
592
+ from agentforge_graph.temporal.backfill import run_backfill
593
+
594
+ return await run_backfill(self._repo_path, self._config, history, languages=self._languages)
595
+
596
+ async def temporal_status(self) -> dict[str, Any]:
597
+ """Temporal sidecar summary for ``ckg status``: whether the feature is
598
+ enabled, how many events the log holds, and how far back history has
599
+ been backfilled."""
600
+ from agentforge_graph.config import StoreConfig, TemporalConfig
601
+
602
+ enabled = TemporalConfig.load(self._config).enabled
603
+ root = Path(self._repo_path) / StoreConfig.load(self._config).path
604
+ db = root / "temporal.db"
605
+ if not db.exists():
606
+ return {"enabled": enabled, "events": 0, "has_sidecar": False, "backfilled_through": ""}
607
+ from agentforge_graph.temporal import TemporalStore
608
+
609
+ store = TemporalStore.open(root)
610
+ return {
611
+ "enabled": enabled,
612
+ "events": await store.count_events(),
613
+ "has_sidecar": True,
614
+ "backfilled_through": await store.get_meta("backfilled_through") or "",
615
+ }
616
+
617
+ async def decisions(
618
+ self, scope: str | None = None, status: str | None = None
619
+ ) -> list[DecisionInfo]:
620
+ """Architecture decisions (feat-010). ``scope`` keeps a decision whose
621
+ own path is under the prefix or which governs a symbol under it;
622
+ ``status`` filters by ADR status. Sorted by (status, date desc)."""
623
+ from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, SymbolID
624
+ from agentforge_graph.knowledge import DecisionInfo
625
+
626
+ nodes = (
627
+ await self._store.graph.query(GraphQuery(kinds=[NodeKind.DECISION], limit=10_000_000))
628
+ ).nodes
629
+ out: list[DecisionInfo] = []
630
+ for n in nodes:
631
+ governs = [
632
+ e.dst for e in await self._store.graph.adjacent(n.id, [EdgeKind.GOVERNS], "out")
633
+ ]
634
+ if status and str(n.attrs.get("status", "")) != status:
635
+ continue
636
+ if scope:
637
+ own = SymbolID.parse(n.id).path
638
+ if not own.startswith(scope) and not any(
639
+ SymbolID.parse(g).path.startswith(scope) for g in governs
640
+ ):
641
+ continue
642
+ out.append(
643
+ DecisionInfo(
644
+ id=n.id,
645
+ adr_id=str(n.attrs.get("adr_id", "")),
646
+ title=str(n.attrs.get("title", n.name)),
647
+ status=str(n.attrs.get("status", "")),
648
+ date=str(n.attrs.get("date", "")),
649
+ path=str(n.attrs.get("path", SymbolID.parse(n.id).path)),
650
+ governs=governs,
651
+ )
652
+ )
653
+ out.sort(key=lambda d: (d.status, d.date), reverse=True)
654
+ return out
655
+
656
+ async def enrich(self, judge: object | None = None, budget_usd: float | None = None) -> Any:
657
+ """LLM pattern enrichment (feat-012). Drains the ``patterns`` DirtySet
658
+ if non-empty (incremental), else tags all Class/Function symbols. Builds
659
+ the Bedrock judge from ``EnrichConfig`` unless one is supplied. Returns
660
+ an ``EnrichReport``. Never runs implicitly — explicit call only."""
661
+ from agentforge_graph.config import EnrichConfig, StoreConfig
662
+ from agentforge_graph.core import GraphQuery
663
+ from agentforge_graph.enrich import PatternHeuristics, PatternJudge, PatternTagEnricher
664
+ from agentforge_graph.enrich.heuristics import Recall, class_and_function_ids
665
+
666
+ from .incremental import DirtySet
667
+
668
+ cfg = EnrichConfig.load(self._config)
669
+ repo = Path(self._repo_path).resolve().name
670
+ root = Path(self._repo_path) / StoreConfig.load(self._config).path
671
+ if isinstance(judge, PatternJudge):
672
+ the_judge: PatternJudge = judge
673
+ else:
674
+ from agentforge_graph.enrich.registry import judge_from_config
675
+
676
+ the_judge = judge_from_config(cfg) # ENH-003: provider-selected
677
+
678
+ dirty = DirtySet(root)
679
+ dirty_ids = await dirty.dirty_for("patterns")
680
+ if dirty_ids:
681
+ symbol_ids = dirty_ids
682
+ else:
683
+ nodes = (await self._store.graph.query(GraphQuery(limit=10_000_000))).nodes
684
+ symbol_ids = class_and_function_ids(nodes)
685
+
686
+ recall: Recall = "broad" if cfg.patterns_recall == "broad" else "conservative"
687
+ enricher = PatternTagEnricher(
688
+ repo,
689
+ the_judge,
690
+ heuristics=PatternHeuristics(recall=recall),
691
+ confidence_floor=cfg.confidence_floor,
692
+ budget_usd=budget_usd if budget_usd is not None else cfg.budget_usd,
693
+ concurrency=cfg.concurrency,
694
+ commit=_git_commit(self._repo_path),
695
+ )
696
+ report = await enricher.enrich(self._store.graph, symbol_ids)
697
+ await dirty.mark_clean("patterns", enricher.last_judged_ids)
698
+ return report
699
+
700
+ async def infer_governs(
701
+ self, matcher: object | None = None, budget_usd: float | None = None
702
+ ) -> Any:
703
+ """LLM ``infer_governs`` pass (feat-010): for ADRs whose prose names no
704
+ code, match the decision text against repo symbols and write ``GOVERNS``
705
+ edges with ``llm`` provenance. Only decisions with zero *parsed* GOVERNS
706
+ are touched. Builds the matcher from ``EnrichConfig`` (provider) unless one
707
+ is supplied; budget from ``knowledge.infer_budget_usd``. Explicit call only
708
+ (``ckg enrich --decisions``); returns a ``GovernsReport``."""
709
+ from agentforge_graph.config import EnrichConfig, KnowledgeConfig
710
+ from agentforge_graph.enrich import DecisionGovernsInferencer, GovernsMatcher
711
+
712
+ ecfg = EnrichConfig.load(self._config)
713
+ kcfg = KnowledgeConfig.load(self._config)
714
+ repo = Path(self._repo_path).resolve().name
715
+ if isinstance(matcher, GovernsMatcher):
716
+ the_matcher: GovernsMatcher = matcher
717
+ else:
718
+ from agentforge_graph.enrich.registry import governs_matcher_from_config
719
+
720
+ the_matcher = governs_matcher_from_config(ecfg)
721
+
722
+ inferencer = DecisionGovernsInferencer(
723
+ repo,
724
+ the_matcher,
725
+ confidence_floor=ecfg.confidence_floor,
726
+ budget_usd=budget_usd if budget_usd is not None else kcfg.infer_budget_usd,
727
+ commit=_git_commit(self._repo_path),
728
+ )
729
+ return await inferencer.enrich(self._store.graph)
730
+
731
+ async def tagged(self, pattern: str, min_confidence: float = 0.7) -> list[TaggedInfo]:
732
+ """Symbols carrying ``pattern`` above ``min_confidence`` (feat-012)."""
733
+ from agentforge_graph.core import EdgeKind, SymbolID
734
+ from agentforge_graph.enrich import TaggedInfo, pattern_tag_id
735
+
736
+ repo = Path(self._repo_path).resolve().name
737
+ tag_id = pattern_tag_id(repo, pattern)
738
+ if await self._store.graph.get(tag_id) is None:
739
+ return []
740
+ out: list[TaggedInfo] = []
741
+ for e in await self._store.graph.adjacent(tag_id, [EdgeKind.TAGGED], "in"):
742
+ conf = float(e.attrs.get("confidence", 0.0))
743
+ if conf >= min_confidence and SymbolID.parse(e.src).descriptor:
744
+ out.append(
745
+ TaggedInfo(
746
+ symbol_id=e.src,
747
+ pattern=pattern,
748
+ confidence=conf,
749
+ rationale=str(e.attrs.get("rationale", "")),
750
+ )
751
+ )
752
+ out.sort(key=lambda t: t.confidence, reverse=True)
753
+ return out
754
+
755
+ async def summarize(
756
+ self, summarizer: object | None = None, budget_usd: float | None = None
757
+ ) -> Any:
758
+ """Bottom-up module summaries (feat-012): file summaries + one repo
759
+ summary, embedded for concept search. Drains DirtySet("summaries") if
760
+ non-empty, else summarizes all files. Builds the Bedrock summarizer +
761
+ embedder from config unless supplied. Explicit call only."""
762
+ from agentforge_graph.config import EmbedConfig, EnrichConfig, StoreConfig
763
+ from agentforge_graph.core import GraphQuery, NodeKind, SymbolID
764
+ from agentforge_graph.embed import embedder_from_config
765
+ from agentforge_graph.enrich import Summarizer, SummaryEnricher
766
+
767
+ from .incremental import DirtySet
768
+
769
+ cfg = EnrichConfig.load(self._config)
770
+ repo = Path(self._repo_path).resolve().name
771
+ root = Path(self._repo_path) / StoreConfig.load(self._config).path
772
+ if isinstance(summarizer, Summarizer):
773
+ the_summarizer: Summarizer = summarizer
774
+ else:
775
+ from agentforge_graph.enrich.registry import summarizer_from_config
776
+
777
+ the_summarizer = summarizer_from_config(cfg) # ENH-003: provider-selected
778
+
779
+ files = (
780
+ await self._store.graph.query(GraphQuery(kinds=[NodeKind.FILE], limit=10**9))
781
+ ).nodes
782
+ dirty = DirtySet(root)
783
+ dirty_ids = await dirty.dirty_for("summaries")
784
+ if dirty_ids: # dirty entries are symbol ids → the files that contain them
785
+ paths = {SymbolID.parse(i).path for i in dirty_ids}
786
+ file_ids = [n.id for n in files if SymbolID.parse(n.id).path in paths]
787
+ else:
788
+ file_ids = [n.id for n in files]
789
+
790
+ enricher = SummaryEnricher(
791
+ repo,
792
+ the_summarizer,
793
+ embedder=embedder_from_config(EmbedConfig.load(self._config)),
794
+ max_words=cfg.summary_max_words,
795
+ levels=cfg.summary_levels,
796
+ budget_usd=budget_usd if budget_usd is not None else cfg.budget_usd,
797
+ concurrency=cfg.concurrency,
798
+ commit=_git_commit(self._repo_path),
799
+ )
800
+ report = await enricher.enrich(self._store, file_ids)
801
+ done_paths = {SymbolID.parse(f).path for f in enricher.last_done_ids}
802
+ await dirty.mark_clean(
803
+ "summaries", [i for i in dirty_ids if SymbolID.parse(i).path in done_paths]
804
+ )
805
+ return report
806
+
807
+ async def summaries(self, level: str | None = None) -> list[SummaryInfo]:
808
+ """Stored module summaries (feat-012), optionally filtered by level."""
809
+ from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind
810
+ from agentforge_graph.enrich import SummaryInfo
811
+
812
+ nodes = (
813
+ await self._store.graph.query(GraphQuery(kinds=[NodeKind.SUMMARY], limit=10**9))
814
+ ).nodes
815
+ out: list[SummaryInfo] = []
816
+ for n in nodes:
817
+ lvl = str(n.attrs.get("level", ""))
818
+ if level is not None and lvl != level:
819
+ continue
820
+ targets = await self._store.graph.adjacent(n.id, [EdgeKind.SUMMARIZES], "out")
821
+ out.append(
822
+ SummaryInfo(
823
+ target=targets[0].dst if targets else "",
824
+ level=lvl,
825
+ text=str(n.attrs.get("text", "")),
826
+ path=str(n.attrs.get("path", "")),
827
+ )
828
+ )
829
+ out.sort(key=lambda s: (s.level, s.path))
830
+ return out
831
+
832
+ @property
833
+ def store(self) -> Store:
834
+ return self._store
835
+
836
+ def stats(self) -> IndexReport:
837
+ if self._report is None:
838
+ raise RuntimeError("no index report: open() does not index — use index()")
839
+ return self._report
840
+
841
+ def embed_stats(self) -> EmbedReport:
842
+ if self._embed_report is None:
843
+ raise RuntimeError("no embed report: call embed() first")
844
+ return self._embed_report
845
+
846
+ async def close(self) -> None:
847
+ await self._store.close()