agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,338 @@
1
+ """History backfill (feat-009 chunk 4, ``ckg index --history N``).
2
+
3
+ Seeds the evolution log for code that predates temporal adoption by *replaying*
4
+ the last ``N`` commits oldest→newest through the **existing** incremental
5
+ pipeline against a **throwaway** graph store, feeding the real sidecar recorder
6
+ at each step. The HEAD index and the embeddings are never touched — backfill
7
+ writes lifecycle events only (design §4.6).
8
+
9
+ - File content at each historical commit is read from git (``git ls-tree`` +
10
+ ``git show <commit>:<path>``) via :class:`GitBlobSource` — **no checkout
11
+ churn**, the working tree is left alone.
12
+ - The per-step diff is ``git diff --name-status -M <parent> <commit>``.
13
+ - Churn/authorship mining is **skipped** during replay (it is a HEAD-time
14
+ signal, mined by chunk 2; replaying it would clobber HEAD aggregates with
15
+ stale values). Only ``OPENED``/``CLOSED`` are recorded.
16
+ - **Resumable**: the oldest covered commit is stored as ``backfilled_through``;
17
+ a re-run whose requested range is already covered is a no-op. Events are
18
+ idempotent (unique per symbol/commit/event), so a partial run re-runs safely.
19
+
20
+ The accuracy this buys: a symbol's earliest ``OPENED`` event becomes its true
21
+ introduction commit (within the backfilled horizon), so ``history().introduced``
22
+ is no longer window-bounded for pre-existing code.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import hashlib
28
+ import subprocess
29
+ import tempfile
30
+ from collections.abc import Iterable, Iterator
31
+ from pathlib import Path, PurePosixPath
32
+
33
+ from pydantic import BaseModel
34
+
35
+ from agentforge_graph.core import SourceFile
36
+ from agentforge_graph.ingest.source import RepoSource
37
+ from agentforge_graph.store import Store
38
+
39
+ from .recorder import build_recorder, seed_symbols
40
+ from .store import TemporalStore
41
+
42
+ _FULL = -1 # `--history full` sentinel
43
+
44
+
45
+ class BackfillReport(BaseModel):
46
+ ran: bool
47
+ commits: int = 0
48
+ events_before: int = 0
49
+ events_after: int = 0
50
+ backfilled_through: str = ""
51
+ reason: str = ""
52
+
53
+ @property
54
+ def events_added(self) -> int:
55
+ return max(self.events_after - self.events_before, 0)
56
+
57
+
58
+ def parse_history(value: str | int | None) -> int:
59
+ """Normalise the ``--history`` argument: ``"full"`` → ``_FULL``; an int-ish
60
+ → that many commits; ``None``/0 → 0 (no backfill)."""
61
+ if value is None:
62
+ return 0
63
+ if isinstance(value, int):
64
+ return value
65
+ if value.strip().lower() == "full":
66
+ return _FULL
67
+ try:
68
+ return max(int(value), 0)
69
+ except ValueError:
70
+ return 0
71
+
72
+
73
+ # --- a git-blob source: file content at a specific commit -----------------
74
+
75
+
76
+ class GitBlobSource(RepoSource):
77
+ """A ``RepoSource`` that yields the indexable files of a specific *commit's*
78
+ tree (read from git), not the working tree. ``restrict`` limits the read to
79
+ a known path set (an incremental step only needs its touched files), so
80
+ per-step cost is bounded — the working tree is never touched."""
81
+
82
+ def __init__(
83
+ self,
84
+ root: str | Path,
85
+ commit: str,
86
+ *,
87
+ exclude: list[str],
88
+ include: list[str] | None = None,
89
+ max_file_kb: int = 512,
90
+ restrict: set[str] | None = None,
91
+ ) -> None:
92
+ super().__init__(root, include=include, exclude=exclude, max_file_kb=max_file_kb)
93
+ self.commit = commit
94
+ self._restrict = restrict
95
+
96
+ def iter_files(self, registry: object) -> Iterator[SourceFile]:
97
+ self.skipped = []
98
+ for rel in self._tree_paths():
99
+ if self._restrict is not None and rel not in self._restrict:
100
+ continue
101
+ if self._is_excluded(rel) or not self._is_included(rel):
102
+ continue
103
+ pack = registry.for_extension(PurePosixPath(rel).suffix) # type: ignore[attr-defined]
104
+ if pack is None:
105
+ continue
106
+ raw = self._blob(rel)
107
+ if raw is None:
108
+ continue
109
+ if len(raw) > self.max_file_kb * 1024:
110
+ self.skipped.append(f"{rel} (> {self.max_file_kb}KB)")
111
+ continue
112
+ yield SourceFile(
113
+ path=rel,
114
+ text=raw.decode("utf-8", errors="replace"),
115
+ language=pack.lang_slug,
116
+ content_hash=hashlib.sha256(raw).hexdigest(),
117
+ )
118
+
119
+ def _tree_paths(self) -> list[str]:
120
+ try:
121
+ out = subprocess.run(
122
+ ["git", "-C", str(self.root), "ls-tree", "-r", "--name-only", self.commit],
123
+ capture_output=True,
124
+ text=True,
125
+ check=True,
126
+ )
127
+ except (subprocess.SubprocessError, OSError):
128
+ return []
129
+ return [p for p in out.stdout.splitlines() if p]
130
+
131
+ def _blob(self, rel: str) -> bytes | None:
132
+ try:
133
+ out = subprocess.run(
134
+ ["git", "-C", str(self.root), "show", f"{self.commit}:{rel}"],
135
+ capture_output=True,
136
+ check=True,
137
+ )
138
+ except (subprocess.SubprocessError, OSError):
139
+ return None
140
+ return out.stdout
141
+
142
+
143
+ # --- a recorder that records lifecycle but skips churn --------------------
144
+
145
+
146
+ class _LifecycleOnly:
147
+ """Wraps the real recorder; forwards open/close/flush but no-ops
148
+ ``record_churn`` so replay never clobbers HEAD churn aggregates."""
149
+
150
+ def __init__(self, inner: object) -> None:
151
+ self._inner = inner
152
+
153
+ def open(self, symbol_ids: Iterable[str], at: str, ts: int) -> None:
154
+ self._inner.open(symbol_ids, at, ts) # type: ignore[attr-defined]
155
+
156
+ def close(self, symbol_ids: Iterable[str], at: str, ts: int) -> None:
157
+ self._inner.close(symbol_ids, at, ts) # type: ignore[attr-defined]
158
+
159
+ async def record_churn(self, *args: object, **kwargs: object) -> None:
160
+ return None
161
+
162
+ async def flush(self) -> None:
163
+ await self._inner.flush() # type: ignore[attr-defined]
164
+
165
+
166
+ # --- git helpers ----------------------------------------------------------
167
+
168
+
169
+ def _git(root: str | Path, *args: str) -> str | None:
170
+ try:
171
+ out = subprocess.run(
172
+ ["git", "-C", str(root), *args], capture_output=True, text=True, check=True
173
+ )
174
+ except (subprocess.SubprocessError, OSError):
175
+ return None
176
+ return out.stdout
177
+
178
+
179
+ def _commit_list(root: str | Path, history: int) -> list[str]:
180
+ """The commits to replay, oldest→newest. ``history == _FULL`` walks to the
181
+ root; ``N`` takes the last ``N+1`` (a baseline + ``N`` diff steps)."""
182
+ args = ["rev-list", "--reverse"]
183
+ if history != _FULL:
184
+ args += ["-n", str(history + 1)]
185
+ args.append("HEAD")
186
+ out = _git(root, *args)
187
+ return [c for c in out.splitlines() if c] if out else []
188
+
189
+
190
+ def _commit_ts(root: str | Path, commit: str) -> int:
191
+ out = _git(root, "show", "-s", "--format=%ct", commit)
192
+ try:
193
+ return int(out.strip()) if out else 0
194
+ except ValueError:
195
+ return 0
196
+
197
+
198
+ def _is_ancestor(root: str | Path, a: str, b: str) -> bool:
199
+ """True if commit ``a`` is an ancestor of (or equal to) ``b``."""
200
+ try:
201
+ return (
202
+ subprocess.run(
203
+ ["git", "-C", str(root), "merge-base", "--is-ancestor", a, b],
204
+ capture_output=True,
205
+ ).returncode
206
+ == 0
207
+ )
208
+ except (subprocess.SubprocessError, OSError):
209
+ return False
210
+
211
+
212
+ def _changeset(root: str | Path, parent: str, commit: str, registry: object) -> object:
213
+ """A feat-004 ``ChangeSet`` from ``git diff --name-status -M`` between two
214
+ commits, restricted to indexable files."""
215
+ from agentforge_graph.ingest.incremental import ChangeSet
216
+
217
+ out = _git(root, "diff", "--name-status", "-M", parent, commit)
218
+ added: list[str] = []
219
+ modified: list[str] = []
220
+ deleted: list[str] = []
221
+ renamed: list[tuple[str, str]] = []
222
+
223
+ def indexable(p: str) -> bool:
224
+ return registry.for_extension(PurePosixPath(p).suffix) is not None # type: ignore[attr-defined]
225
+
226
+ for line in (out or "").splitlines():
227
+ parts = line.split("\t")
228
+ code = parts[0]
229
+ if code.startswith("R") and len(parts) == 3:
230
+ old, new = parts[1], parts[2]
231
+ if indexable(old) or indexable(new):
232
+ renamed.append((old, new))
233
+ elif len(parts) == 2 and indexable(parts[1]):
234
+ path = parts[1]
235
+ if code.startswith("A"):
236
+ added.append(path)
237
+ elif code.startswith("M"):
238
+ modified.append(path)
239
+ elif code.startswith("D"):
240
+ deleted.append(path)
241
+ return ChangeSet(
242
+ added=sorted(added),
243
+ modified=sorted(modified),
244
+ deleted=sorted(deleted),
245
+ renamed=renamed,
246
+ )
247
+
248
+
249
+ async def _open_temp_store(tmp: Path) -> Store:
250
+ """An embedded (kuzu + lance) throwaway store, regardless of the real
251
+ config's backend — backfill replays locally and discards it."""
252
+ from agentforge_graph.config import StoreConfig
253
+ from agentforge_graph.store.registry import graph_driver, vector_driver
254
+
255
+ graph = await graph_driver("kuzu").open(tmp / "graph.kuzu")
256
+ vectors = await vector_driver("lancedb").open(tmp / "vectors.lance")
257
+ return Store(graph, vectors, StoreConfig())
258
+
259
+
260
+ async def run_backfill(
261
+ repo_path: str | Path,
262
+ config: str | Path | None,
263
+ history: int,
264
+ *,
265
+ languages: str | list[str] | None = None,
266
+ ) -> BackfillReport:
267
+ """Replay ``history`` commits into the evolution log. See the module
268
+ docstring for the model; returns a report (``ran=False`` with a reason when
269
+ skipped)."""
270
+ from agentforge_graph.config import IngestConfig, StoreConfig, TemporalConfig
271
+ from agentforge_graph.ingest.codegraph import _registry_for
272
+ from agentforge_graph.ingest.incremental import IncrementalIndexer
273
+ from agentforge_graph.ingest.pipeline import IngestPipeline
274
+
275
+ if history == 0:
276
+ return BackfillReport(ran=False, reason="history=0 (nothing to backfill)")
277
+ if not TemporalConfig.load(config).enabled:
278
+ return BackfillReport(ran=False, reason="temporal disabled")
279
+
280
+ commits = _commit_list(repo_path, history)
281
+ if len(commits) < 2: # need a baseline + ≥1 step
282
+ return BackfillReport(ran=False, reason="not a git repo or too few commits")
283
+
284
+ root = Path(repo_path) / StoreConfig.load(config).path
285
+ tstore = TemporalStore.open(root)
286
+ target_oldest = commits[0]
287
+ cursor = await tstore.get_meta("backfilled_through")
288
+ if cursor and _is_ancestor(repo_path, cursor, target_oldest):
289
+ return BackfillReport(ran=False, reason="already backfilled", backfilled_through=cursor)
290
+
291
+ ingest = IngestConfig.load(config)
292
+ registry = _registry_for(languages if languages is not None else ingest.languages)
293
+ repo = Path(repo_path).resolve().name
294
+ exclude, max_kb = ingest.exclude, ingest.max_file_kb
295
+
296
+ recorder = build_recorder(str(root))
297
+ lifecycle = _LifecycleOnly(recorder)
298
+ events_before = await tstore.count_events()
299
+
300
+ with tempfile.TemporaryDirectory() as tmpdir:
301
+ store = await _open_temp_store(Path(tmpdir))
302
+ try:
303
+ c0 = commits[0]
304
+ src0 = GitBlobSource(repo_path, c0, exclude=exclude, max_file_kb=max_kb)
305
+ await IngestPipeline(repo=repo, commit=c0).run(src0, store.graph, registry)
306
+ # OPENED for everything alive at the baseline; repo_root="" → no churn
307
+ await seed_symbols(store.graph, recorder, c0, _commit_ts(repo_path, c0))
308
+
309
+ for prev, cur in zip(commits, commits[1:], strict=False):
310
+ changes = _changeset(repo_path, prev, cur, registry)
311
+ if changes.is_empty(): # type: ignore[attr-defined]
312
+ continue
313
+ touched = set(changes.touched_paths()) # type: ignore[attr-defined]
314
+ src = GitBlobSource(
315
+ repo_path, cur, exclude=exclude, max_file_kb=max_kb, restrict=touched
316
+ )
317
+ indexer = IncrementalIndexer(
318
+ store,
319
+ src,
320
+ registry,
321
+ repo,
322
+ commit=cur,
323
+ dirty=None,
324
+ recorder=lifecycle,
325
+ commit_ts=_commit_ts(repo_path, cur),
326
+ )
327
+ await indexer.refresh(changes) # type: ignore[arg-type]
328
+ finally:
329
+ await store.close()
330
+
331
+ await tstore.set_meta("backfilled_through", target_oldest)
332
+ return BackfillReport(
333
+ ran=True,
334
+ commits=len(commits),
335
+ events_before=events_before,
336
+ events_after=await tstore.count_events(),
337
+ backfilled_through=target_oldest,
338
+ )
@@ -0,0 +1,82 @@
1
+ """Value types for the temporal evolution log (feat-009).
2
+
3
+ An ``Event`` is one lifecycle record for a symbol (or, later, an edge): it was
4
+ ``opened`` (first observed / re-introduced) or ``closed`` (removed) at a commit,
5
+ or ``succeeds`` another symbol (rename lineage). These are *commit-validity*
6
+ facts — when something was true in the repo — not ingestion-time facts (the
7
+ design's bi-temporal-lite scope; see design-009 §3).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from enum import StrEnum
13
+
14
+ from pydantic import BaseModel, ConfigDict
15
+
16
+
17
+ class EventKind(StrEnum):
18
+ OPENED = "opened" # symbol first observed / re-introduced at `commit`
19
+ CLOSED = "closed" # symbol removed at `commit`
20
+ SUCCEEDS = "succeeds" # `symbol_id` is the successor of `ref` (rename lineage)
21
+
22
+
23
+ class Entity(StrEnum):
24
+ NODE = "node"
25
+ EDGE = "edge"
26
+
27
+
28
+ class Event(BaseModel):
29
+ """One append-only lifecycle record in the evolution log."""
30
+
31
+ model_config = ConfigDict(frozen=True)
32
+
33
+ symbol_id: str
34
+ event: EventKind
35
+ commit: str
36
+ ts: int = 0 # commit author time (epoch seconds); 0 if unknown / non-git
37
+ entity: Entity = Entity.NODE
38
+ ref: str | None = None # SUCCEEDS: the prior symbol id this one supersedes
39
+
40
+
41
+ # --- read-side value types (chunk 3 read APIs) ----------------------------
42
+
43
+
44
+ class Author(BaseModel):
45
+ """An author and how many commits they made to a symbol's span (within the
46
+ mined window)."""
47
+
48
+ model_config = ConfigDict(frozen=True)
49
+
50
+ name: str
51
+ commits: int
52
+
53
+
54
+ class Change(BaseModel):
55
+ """One symbol that changed since a reference commit — the unit returned by
56
+ ``changed_since``."""
57
+
58
+ model_config = ConfigDict(frozen=True)
59
+
60
+ symbol_id: str
61
+ path: str
62
+ kind: str # "opened" | "closed" | "modified"
63
+ commit: str
64
+ ts: int
65
+
66
+
67
+ class SymbolHistory(BaseModel):
68
+ """A symbol's evolution at a glance: when it was introduced / last changed,
69
+ its churn windows, its authors, and the raw lifecycle events. Read from the
70
+ sidecar (+ the current graph for the live span)."""
71
+
72
+ model_config = ConfigDict(frozen=True)
73
+
74
+ symbol_id: str
75
+ introduced: str = "" # commit sha (prefer the OPENED event; else mined)
76
+ introduced_ts: int = 0
77
+ last_changed: str = ""
78
+ last_changed_ts: int = 0
79
+ churn_30d: int = 0
80
+ churn_90d: int = 0
81
+ authors: list[Author] = []
82
+ events: list[Event] = []
@@ -0,0 +1,190 @@
1
+ """``TemporalIndex`` — the read side of the evolution log (feat-009 chunks 3+5).
2
+
3
+ Answers the questions an agent asks after a regression — *when was this
4
+ introduced, who owns it, how much does it churn, what changed since <ref>, what
5
+ did this look like as_of <commit>* — from the sidecar (``TemporalStore``) plus
6
+ the current graph. Pure reads; no mutation, no embedding.
7
+
8
+ `introduced` prefers the chunk-1 ``OPENED`` event (the exact birth commit when
9
+ the symbol was added during the temporal era) and falls back to the mined
10
+ aggregate's window-bounded estimate otherwise (design §4.5 known limitation).
11
+
12
+ ``alive_at(C)`` reconstructs the set of symbols valid at commit ``C`` by
13
+ replaying the log: a symbol is alive iff the *last* lifecycle event at or before
14
+ ``C`` is ``OPENED`` (design §4.7). This tolerates the spurious ``OPENED`` the
15
+ full-index seed stamps at HEAD — that event is *after* any historical ``C``, so
16
+ it never leaks into an as_of reconstruction.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import subprocess
22
+ from collections import defaultdict
23
+ from fnmatch import fnmatch
24
+
25
+ from agentforge_graph.core import GraphStore, SymbolID
26
+
27
+ from .events import Author, Change, Event, EventKind, SymbolHistory
28
+ from .store import TemporalStore
29
+
30
+
31
+ class TemporalError(Exception):
32
+ """A temporal query that cannot be answered honestly (e.g. an ``as_of``
33
+ commit older than the retention horizon) — never a silent wrong answer."""
34
+
35
+
36
+ class TemporalIndex:
37
+ def __init__(
38
+ self,
39
+ store: TemporalStore,
40
+ graph: GraphStore,
41
+ repo_root: str = "",
42
+ retention_commits: int = 0,
43
+ ) -> None:
44
+ self._store = store
45
+ self._graph = graph
46
+ self._root = repo_root
47
+ self._retention = retention_commits
48
+
49
+ async def history(self, symbol_id: str) -> SymbolHistory:
50
+ events = await self._store.events_for(symbol_id)
51
+ agg = await self._store.aggregate_for(symbol_id)
52
+
53
+ # introduced: earliest OPENED event (exact) wins over the mined estimate.
54
+ opened = [e for e in events if e.event is EventKind.OPENED]
55
+ if opened:
56
+ first = min(opened, key=lambda e: e.ts)
57
+ introduced, introduced_ts = first.commit, first.ts
58
+ elif agg is not None:
59
+ introduced, introduced_ts = agg.introduced_sha, agg.introduced_ts
60
+ else:
61
+ introduced, introduced_ts = "", 0
62
+
63
+ # last_changed: the most recent of any event or the mined last_changed.
64
+ last, last_ts = "", 0
65
+ for e in events:
66
+ if e.ts >= last_ts:
67
+ last, last_ts = e.commit, e.ts
68
+ if agg is not None and agg.last_changed_ts >= last_ts:
69
+ last, last_ts = agg.last_changed_sha, agg.last_changed_ts
70
+
71
+ authors = [Author(name=n, commits=c) for n, c in (agg.top_authors if agg else [])]
72
+ return SymbolHistory(
73
+ symbol_id=symbol_id,
74
+ introduced=introduced,
75
+ introduced_ts=introduced_ts,
76
+ last_changed=last,
77
+ last_changed_ts=last_ts,
78
+ churn_30d=agg.churn_30d if agg else 0,
79
+ churn_90d=agg.churn_90d if agg else 0,
80
+ authors=authors,
81
+ events=events,
82
+ )
83
+
84
+ async def authors(self, symbol_id: str) -> list[Author]:
85
+ agg = await self._store.aggregate_for(symbol_id)
86
+ return [Author(name=n, commits=c) for n, c in (agg.top_authors if agg else [])]
87
+
88
+ async def churn(self, symbol_id: str, window_days: int = 90) -> int:
89
+ agg = await self._store.aggregate_for(symbol_id)
90
+ if agg is None:
91
+ return 0
92
+ return agg.churn_30d if window_days <= 30 else agg.churn_90d
93
+
94
+ async def changed_since(self, ref: str, scope: str | None = None) -> list[Change]:
95
+ """Symbols with recorded activity after ``ref`` (a commit-ish), newest
96
+ first. Lifecycle events (opened/closed) and mined modifications both
97
+ count; ``scope`` keeps only paths matching the glob or prefix."""
98
+ since_ts = self._resolve_ts(ref)
99
+ changes: dict[str, Change] = {}
100
+ # lifecycle events after the ref — the precise kind
101
+ for e in await self._store.all_events():
102
+ if e.ts > since_ts:
103
+ changes[e.symbol_id] = Change(
104
+ symbol_id=e.symbol_id,
105
+ path=SymbolID.parse(e.symbol_id).path,
106
+ kind=e.event.value,
107
+ commit=e.commit,
108
+ ts=e.ts,
109
+ )
110
+ # mined modifications after the ref (don't overwrite a precise lifecycle)
111
+ for agg in await self._store.all_aggregates():
112
+ if agg.last_changed_ts > since_ts and agg.symbol_id not in changes:
113
+ changes[agg.symbol_id] = Change(
114
+ symbol_id=agg.symbol_id,
115
+ path=SymbolID.parse(agg.symbol_id).path,
116
+ kind="modified",
117
+ commit=agg.last_changed_sha,
118
+ ts=agg.last_changed_ts,
119
+ )
120
+ out = [c for c in changes.values() if _in_scope(c.path, scope)]
121
+ out.sort(key=lambda c: (-c.ts, c.symbol_id))
122
+ return out
123
+
124
+ async def alive_at(self, commit: str) -> set[str]:
125
+ """The set of symbol ids valid at ``commit`` — reconstructed by replaying
126
+ the log over the current node set (design §4.7). Raises ``TemporalError``
127
+ when ``commit`` is older than the retention horizon (its closed events
128
+ may have been pruned, so the answer would be silently wrong)."""
129
+ ts = self._resolve_ts(commit)
130
+ horizon = self._horizon_ts()
131
+ if horizon and ts < horizon:
132
+ raise TemporalError(
133
+ f"{commit} is beyond the retention horizon ({self._retention} commits)"
134
+ )
135
+ by_sym: dict[str, list[Event]] = defaultdict(list)
136
+ for e in await self._store.all_events(): # ordered by (ts, rowid)
137
+ if e.ts <= ts:
138
+ by_sym[e.symbol_id].append(e)
139
+ # alive iff the last lifecycle event at/before C opened (not closed) it
140
+ return {sid for sid, evs in by_sym.items() if evs[-1].event is EventKind.OPENED}
141
+
142
+ # --- internals --------------------------------------------------------
143
+
144
+ def _horizon_ts(self) -> int:
145
+ """Author time of ``HEAD~retention_commits`` (the oldest commit still in
146
+ retention), or 0 when retention is unbounded / history is shorter."""
147
+ if self._retention <= 0:
148
+ return 0
149
+ sha = self._git("rev-parse", f"HEAD~{self._retention}")
150
+ return self._commit_ts(sha.strip()) if sha else 0
151
+
152
+ def _commit_ts(self, ref: str) -> int:
153
+ out = self._git("show", "-s", "--format=%ct", ref)
154
+ try:
155
+ return int(out.strip()) if out else 0
156
+ except ValueError:
157
+ return 0
158
+
159
+ def _git(self, *args: str) -> str | None:
160
+ try:
161
+ return subprocess.run(
162
+ ["git", "-C", self._root, *args],
163
+ capture_output=True,
164
+ text=True,
165
+ check=True,
166
+ ).stdout
167
+ except (subprocess.SubprocessError, OSError):
168
+ return None
169
+
170
+ def _resolve_ts(self, ref: str) -> int:
171
+ """Author time (epoch s) of ``ref``. Accepts a raw epoch int too, so the
172
+ API is testable without a working tree."""
173
+ if ref.isdigit():
174
+ return int(ref)
175
+ try:
176
+ out = subprocess.run(
177
+ ["git", "-C", self._root, "show", "-s", "--format=%ct", ref],
178
+ capture_output=True,
179
+ text=True,
180
+ check=True,
181
+ )
182
+ return int(out.stdout.strip())
183
+ except (subprocess.SubprocessError, OSError, ValueError) as exc:
184
+ raise ValueError(f"cannot resolve ref {ref!r} to a commit time") from exc
185
+
186
+
187
+ def _in_scope(path: str, scope: str | None) -> bool:
188
+ if not scope:
189
+ return True
190
+ return path.startswith(scope) or fnmatch(path, scope)