agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,190 @@
1
+ """Churn / authorship mining (feat-009 chunk 2).
2
+
3
+ A symbol's *churn* and *authorship* are ranking + ownership signals
4
+ (design-009 §4.5): how much it has moved lately and who has been editing it.
5
+ We mine them from ``git log`` over a bounded window and attribute each diff
6
+ hunk to the symbol(s) whose **current span** overlaps the hunk's new-line
7
+ range. Attribution is approximate *by design* — historical line numbers drift
8
+ from current spans — which is fine for a ranking signal (it is never asserted
9
+ as provenance).
10
+
11
+ One batched ``git log -U0`` call per refresh covers all touched paths; the
12
+ window (default 90d, derived from the commit's author time so results are
13
+ deterministic in tests) bounds cost. Output is a small, bounded
14
+ ``SymbolAggregate`` per symbol — never a per-commit fact (design §4.10).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import re
20
+ import subprocess
21
+ from collections import defaultdict
22
+ from dataclasses import dataclass, field
23
+ from datetime import UTC, datetime
24
+
25
+ _DAY = 86_400
26
+ _HUNK = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@")
27
+ _SOH = "\x01" # commit-header marker (unambiguous vs. patch text)
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class SymbolAggregate:
32
+ """Bounded churn/authorship rollup for one symbol (design §4.4 aggregates)."""
33
+
34
+ symbol_id: str
35
+ churn_30d: int
36
+ churn_90d: int
37
+ top_authors: list[tuple[str, int]] # (name, commits), ≤3, desc by commits
38
+ introduced_sha: str
39
+ introduced_ts: int
40
+ last_changed_sha: str
41
+ last_changed_ts: int
42
+
43
+ def attrs(self) -> dict[str, object]:
44
+ """The denormalised view written onto a node's ``attrs`` (design §4.0)
45
+ so ``ckg_symbol`` surfaces it with no join."""
46
+ return {
47
+ "churn_30d": self.churn_30d,
48
+ "churn_90d": self.churn_90d,
49
+ "top_authors": [{"name": n, "commits": c} for n, c in self.top_authors],
50
+ "introduced": self.introduced_sha,
51
+ "introduced_ts": self.introduced_ts,
52
+ "last_changed": self.last_changed_sha,
53
+ "last_changed_ts": self.last_changed_ts,
54
+ }
55
+
56
+
57
+ @dataclass
58
+ class _Acc:
59
+ """Per-symbol accumulator while walking the log."""
60
+
61
+ churn_30d: int = 0
62
+ churn_90d: int = 0
63
+ authors: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set)) # name -> shas
64
+ first: tuple[int, str] = (1 << 62, "") # (ts, sha) oldest
65
+ last: tuple[int, str] = (-1, "") # (ts, sha) newest
66
+
67
+
68
+ class ChurnMiner:
69
+ """Mines churn/authorship for a set of files over a commit-time window."""
70
+
71
+ def __init__(self, repo_root: str, *, now_ts: int, window_days: int = 90) -> None:
72
+ self._root = repo_root
73
+ self._now = now_ts
74
+ self._window = window_days
75
+
76
+ def mine(
77
+ self, spans_by_path: dict[str, list[tuple[str, tuple[int, int]]]]
78
+ ) -> list[SymbolAggregate]:
79
+ """Attribute windowed churn/authorship to the symbols in ``spans_by_path``
80
+ (``path -> [(symbol_id, (start_line, end_line)), …]``)."""
81
+ paths = [p for p, syms in spans_by_path.items() if syms]
82
+ if not paths or self._now <= 0:
83
+ return []
84
+ log = self._git_log(paths)
85
+ if log is None:
86
+ return []
87
+ acc: dict[str, _Acc] = defaultdict(_Acc)
88
+ cut30 = self._now - 30 * _DAY
89
+ cut90 = self._now - 90 * _DAY
90
+ for sha, ts, author, path, new_start, count, delta in self._hunks(log):
91
+ for sid in self._overlapping(spans_by_path.get(path, []), new_start, count):
92
+ a = acc[sid]
93
+ a.churn_90d += delta if ts >= cut90 else 0
94
+ a.churn_30d += delta if ts >= cut30 else 0
95
+ a.authors[author].add(sha)
96
+ if ts < a.first[0]:
97
+ a.first = (ts, sha)
98
+ if ts > a.last[0]:
99
+ a.last = (ts, sha)
100
+ return [self._aggregate(sid, a) for sid, a in sorted(acc.items())]
101
+
102
+ # --- internals --------------------------------------------------------
103
+
104
+ def _git_log(self, paths: list[str]) -> str | None:
105
+ since = datetime.fromtimestamp(max(self._now - self._window * _DAY, 0), tz=UTC).strftime(
106
+ "%Y-%m-%d"
107
+ )
108
+ try:
109
+ out = subprocess.run(
110
+ [
111
+ "git",
112
+ "-C",
113
+ self._root,
114
+ "log",
115
+ "--no-renames",
116
+ "--no-color",
117
+ "-U0",
118
+ f"--since={since}",
119
+ f"--format={_SOH}%H%x09%ct%x09%an",
120
+ "--",
121
+ *paths,
122
+ ],
123
+ capture_output=True,
124
+ text=True,
125
+ check=True,
126
+ )
127
+ except (subprocess.SubprocessError, OSError):
128
+ return None
129
+ return out.stdout
130
+
131
+ def _hunks(self, log: str): # type: ignore[no-untyped-def]
132
+ """Yield ``(sha, ts, author, path, new_start, line_count, churn_delta)`` per
133
+ hunk. ``churn_delta`` = added + deleted lines for the hunk."""
134
+ sha = author = path = ""
135
+ ts = 0
136
+ for line in log.splitlines():
137
+ if line.startswith(_SOH):
138
+ parts = line[1:].split("\t")
139
+ if len(parts) == 3:
140
+ sha, ts_s, author = parts
141
+ ts = int(ts_s) if ts_s.isdigit() else 0
142
+ path = ""
143
+ continue
144
+ if line.startswith("diff --git "):
145
+ path = ""
146
+ continue
147
+ if line.startswith("+++ b/"):
148
+ path = line[6:]
149
+ continue
150
+ if line.startswith("@@"):
151
+ m = _HUNK.match(line)
152
+ if not m or not path:
153
+ continue
154
+ new_start = int(m.group(1))
155
+ added = int(m.group(2)) if m.group(2) is not None else 1
156
+ deleted = self._removed(line)
157
+ yield sha, ts, author, path, new_start, added, added + deleted
158
+
159
+ @staticmethod
160
+ def _removed(hunk: str) -> int:
161
+ m = re.match(r"^@@ -\d+(?:,(\d+))? \+", hunk)
162
+ if not m:
163
+ return 0
164
+ return int(m.group(1)) if m.group(1) is not None else 1
165
+
166
+ @staticmethod
167
+ def _overlapping(
168
+ syms: list[tuple[str, tuple[int, int]]], new_start: int, count: int
169
+ ) -> list[str]:
170
+ lo = new_start
171
+ hi = new_start + max(count, 1) - 1
172
+ return [sid for sid, (s, e) in syms if not (e < lo or s > hi)]
173
+
174
+ def _aggregate(self, sid: str, a: _Acc) -> SymbolAggregate:
175
+ top = sorted(
176
+ ((name, len(shas)) for name, shas in a.authors.items()),
177
+ key=lambda t: (-t[1], t[0]),
178
+ )[:3]
179
+ intro_ts, intro_sha = a.first if a.first[1] else (0, "")
180
+ last_ts, last_sha = a.last if a.last[1] else (0, "")
181
+ return SymbolAggregate(
182
+ symbol_id=sid,
183
+ churn_30d=a.churn_30d,
184
+ churn_90d=a.churn_90d,
185
+ top_authors=top,
186
+ introduced_sha=intro_sha,
187
+ introduced_ts=intro_ts,
188
+ last_changed_sha=last_sha,
189
+ last_changed_ts=last_ts,
190
+ )
@@ -0,0 +1,114 @@
1
+ """``SqliteTemporalRecorder`` — the write port the indexer drives (feat-009).
2
+
3
+ The ``IncrementalIndexer`` calls ``open``/``close`` as it applies a diff (it is
4
+ the only writer that sees both the old and new state of a file); the recorder
5
+ buffers those into ``Event``s and writes them in a single transaction on
6
+ ``flush()`` at end-of-refresh — mirroring how ``IndexMeta`` is saved last, so a
7
+ crash leaves a consistent log. ``open``/``close`` are sync (buffering); only
8
+ ``flush`` touches SQLite.
9
+
10
+ Structurally satisfies ``ingest.incremental.ports.TemporalRecorder`` so the
11
+ deterministic ``ingest`` layer depends on a Protocol, not on ``temporal``.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Iterable
17
+
18
+ from agentforge_graph.core import GraphQuery, GraphStore, NodeKind, SymbolID
19
+
20
+ from .events import Entity, Event, EventKind
21
+ from .mining import ChurnMiner
22
+ from .store import TemporalStore
23
+
24
+ _ALL = 10_000_000
25
+ _SYMBOL_KINDS = (NodeKind.CLASS, NodeKind.FUNCTION, NodeKind.METHOD)
26
+
27
+
28
+ class SqliteTemporalRecorder:
29
+ def __init__(self, store: TemporalStore) -> None:
30
+ self._store = store
31
+ self._buf: list[Event] = []
32
+
33
+ def open(self, symbol_ids: Iterable[str], at: str, ts: int) -> None:
34
+ self._buf.extend(
35
+ Event(symbol_id=sid, event=EventKind.OPENED, commit=at, ts=ts, entity=Entity.NODE)
36
+ for sid in symbol_ids
37
+ )
38
+
39
+ def close(self, symbol_ids: Iterable[str], at: str, ts: int) -> None:
40
+ self._buf.extend(
41
+ Event(symbol_id=sid, event=EventKind.CLOSED, commit=at, ts=ts, entity=Entity.NODE)
42
+ for sid in symbol_ids
43
+ )
44
+
45
+ async def record_churn(
46
+ self,
47
+ graph: GraphStore,
48
+ repo_root: str,
49
+ paths: Iterable[str],
50
+ commit: str,
51
+ commit_ts: int,
52
+ ) -> None:
53
+ """Mine churn/authorship for ``paths``, persist aggregates, and
54
+ denormalise them onto the matching node ``attrs`` (design §4.5).
55
+ Cheap on a small diff; a no-op when nothing maps or the commit time is
56
+ unknown (non-git)."""
57
+ if commit_ts <= 0:
58
+ return
59
+ spans = await self._spans_by_path(graph, set(paths))
60
+ if not spans:
61
+ return
62
+ aggs = ChurnMiner(repo_root, now_ts=commit_ts).mine(spans)
63
+ if not aggs:
64
+ return
65
+ await self._store.upsert_aggregates(aggs)
66
+ for agg in aggs:
67
+ await graph.set_attrs(agg.symbol_id, agg.attrs())
68
+
69
+ @staticmethod
70
+ async def _spans_by_path(
71
+ graph: GraphStore, paths: set[str]
72
+ ) -> dict[str, list[tuple[str, tuple[int, int]]]]:
73
+ """``path -> [(symbol_id, span), …]`` for the code symbols in ``paths``
74
+ that carry a span (the attribution targets)."""
75
+ out: dict[str, list[tuple[str, tuple[int, int]]]] = {}
76
+ nodes = (await graph.query(GraphQuery(kinds=list(_SYMBOL_KINDS), limit=_ALL))).nodes
77
+ for n in nodes:
78
+ if n.span is None:
79
+ continue
80
+ path = SymbolID.parse(n.id).path
81
+ if path in paths:
82
+ out.setdefault(path, []).append((n.id, n.span))
83
+ return out
84
+
85
+ async def flush(self) -> None:
86
+ if not self._buf:
87
+ return
88
+ events, self._buf = self._buf, []
89
+ await self._store.record(events)
90
+
91
+
92
+ def build_recorder(root: str) -> SqliteTemporalRecorder:
93
+ """Open the sidecar under ``root`` (the ``.ckg`` dir) and wrap it."""
94
+ return SqliteTemporalRecorder(TemporalStore.open(root))
95
+
96
+
97
+ async def seed_symbols(
98
+ graph: GraphStore,
99
+ recorder: SqliteTemporalRecorder,
100
+ commit: str,
101
+ ts: int,
102
+ repo_root: str = "",
103
+ ) -> None:
104
+ """Open intervals for every code symbol currently in the graph — used after
105
+ a full index so 'introduced' is anchored at the index commit — then mine
106
+ churn/authorship for the whole tree so a fresh index already carries the
107
+ ranking signal. Idempotent: a re-index of the same commit re-opens the same
108
+ events (deduped by the store)."""
109
+ nodes = (await graph.query(GraphQuery(kinds=list(_SYMBOL_KINDS), limit=_ALL))).nodes
110
+ recorder.open([n.id for n in nodes], commit, ts)
111
+ await recorder.flush()
112
+ if repo_root:
113
+ paths = {SymbolID.parse(n.id).path for n in nodes}
114
+ await recorder.record_churn(graph, repo_root, paths, commit, ts)
@@ -0,0 +1,282 @@
1
+ """``TemporalStore`` — the append-only evolution log (feat-009).
2
+
3
+ A stdlib-``sqlite3`` sidecar at ``.ckg/temporal.db``, deliberately *separate*
4
+ from the graph/vector stores: it keeps the current-graph hot path and both
5
+ store adapters untouched (design-009 §4.2), is trivially prunable, and is absent
6
+ for non-git / temporal-off repos. Writes are append-only and idempotent per
7
+ ``(symbol_id, commit, event, ref)`` so a crashed-then-retried refresh stays
8
+ consistent.
9
+
10
+ Chunk 1 implements the ``events`` table (node lifecycle); the ``aggregates``
11
+ table (churn/authorship, chunk 2) is created here but populated later.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import json
18
+ import sqlite3
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ from .events import Entity, Event, EventKind
23
+ from .mining import SymbolAggregate
24
+
25
+ _DB = "temporal.db"
26
+
27
+ _SCHEMA = [
28
+ """CREATE TABLE IF NOT EXISTS events (
29
+ symbol_id TEXT NOT NULL,
30
+ entity TEXT NOT NULL,
31
+ event TEXT NOT NULL,
32
+ commit_sha TEXT NOT NULL,
33
+ ts INTEGER NOT NULL,
34
+ ref TEXT
35
+ )""",
36
+ # idempotency: the same lifecycle fact recorded twice is a no-op. COALESCE so
37
+ # a NULL ref participates in uniqueness (SQLite treats NULLs as distinct).
38
+ """CREATE UNIQUE INDEX IF NOT EXISTS events_unique
39
+ ON events(symbol_id, commit_sha, event, COALESCE(ref, ''))""",
40
+ "CREATE INDEX IF NOT EXISTS events_by_symbol ON events(symbol_id)",
41
+ "CREATE INDEX IF NOT EXISTS events_by_ts ON events(ts)",
42
+ # aggregates: periodic, bounded — populated in chunk 2 (churn/authorship).
43
+ """CREATE TABLE IF NOT EXISTS aggregates (
44
+ symbol_id TEXT PRIMARY KEY,
45
+ churn_30d INTEGER,
46
+ churn_90d INTEGER,
47
+ top_authors TEXT,
48
+ introduced_sha TEXT,
49
+ introduced_ts INTEGER,
50
+ last_changed_sha TEXT,
51
+ last_changed_ts INTEGER
52
+ )""",
53
+ # meta: small key/value side-table (chunk 4 backfill cursor, etc.).
54
+ "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT)",
55
+ ]
56
+
57
+
58
+ class TemporalStore:
59
+ """Embedded SQLite evolution log. Opened once per index/refresh; each
60
+ operation uses its own short-lived connection (SQLite connections are not
61
+ shareable across threads, and ops run via ``asyncio.to_thread``)."""
62
+
63
+ def __init__(self, path: Path) -> None:
64
+ self._path = path
65
+
66
+ @property
67
+ def path(self) -> Path:
68
+ return self._path
69
+
70
+ @classmethod
71
+ def open(cls, root: str | Path) -> TemporalStore:
72
+ """Create (if needed) the sidecar under ``root`` (the ``.ckg`` dir) and
73
+ ensure the schema exists."""
74
+ p = Path(root) / _DB
75
+ p.parent.mkdir(parents=True, exist_ok=True)
76
+ conn = sqlite3.connect(str(p))
77
+ try:
78
+ for ddl in _SCHEMA:
79
+ conn.execute(ddl)
80
+ conn.commit()
81
+ finally:
82
+ conn.close()
83
+ return cls(p)
84
+
85
+ async def record(self, events: list[Event]) -> int:
86
+ """Append events; return the number newly inserted (duplicates ignored)."""
87
+ if not events:
88
+ return 0
89
+ return await asyncio.to_thread(self._record_sync, events)
90
+
91
+ def _record_sync(self, events: list[Event]) -> int:
92
+ conn = sqlite3.connect(str(self._path))
93
+ try:
94
+ cur = conn.executemany(
95
+ "INSERT OR IGNORE INTO events"
96
+ "(symbol_id, entity, event, commit_sha, ts, ref) VALUES (?, ?, ?, ?, ?, ?)",
97
+ [
98
+ (e.symbol_id, e.entity.value, e.event.value, e.commit, e.ts, e.ref)
99
+ for e in events
100
+ ],
101
+ )
102
+ conn.commit()
103
+ return cur.rowcount if cur.rowcount is not None else 0
104
+ finally:
105
+ conn.close()
106
+
107
+ async def events_for(self, symbol_id: str) -> list[Event]:
108
+ """All events for one symbol, oldest first."""
109
+ return await asyncio.to_thread(self._events_for_sync, symbol_id)
110
+
111
+ def _events_for_sync(self, symbol_id: str) -> list[Event]:
112
+ conn = sqlite3.connect(str(self._path))
113
+ try:
114
+ rows = conn.execute(
115
+ "SELECT symbol_id, entity, event, commit_sha, ts, ref FROM events "
116
+ "WHERE symbol_id = ? ORDER BY ts, rowid",
117
+ (symbol_id,),
118
+ ).fetchall()
119
+ finally:
120
+ conn.close()
121
+ return [_row_to_event(r) for r in rows]
122
+
123
+ async def all_events(self) -> list[Event]:
124
+ """Every event, oldest first (test/inspection helper)."""
125
+ return await asyncio.to_thread(self._all_events_sync)
126
+
127
+ def _all_events_sync(self) -> list[Event]:
128
+ conn = sqlite3.connect(str(self._path))
129
+ try:
130
+ rows = conn.execute(
131
+ "SELECT symbol_id, entity, event, commit_sha, ts, ref FROM events "
132
+ "ORDER BY ts, rowid"
133
+ ).fetchall()
134
+ finally:
135
+ conn.close()
136
+ return [_row_to_event(r) for r in rows]
137
+
138
+ # --- meta key/value (chunk 4 resume cursor) ---------------------------
139
+
140
+ async def get_meta(self, key: str) -> str | None:
141
+ return await asyncio.to_thread(self._get_meta_sync, key)
142
+
143
+ def _get_meta_sync(self, key: str) -> str | None:
144
+ conn = sqlite3.connect(str(self._path))
145
+ try:
146
+ row = conn.execute("SELECT value FROM meta WHERE key = ?", (key,)).fetchone()
147
+ finally:
148
+ conn.close()
149
+ return row[0] if row else None
150
+
151
+ async def set_meta(self, key: str, value: str) -> None:
152
+ await asyncio.to_thread(self._set_meta_sync, key, value)
153
+
154
+ def _set_meta_sync(self, key: str, value: str) -> None:
155
+ conn = sqlite3.connect(str(self._path))
156
+ try:
157
+ conn.execute("INSERT OR REPLACE INTO meta(key, value) VALUES (?, ?)", (key, value))
158
+ conn.commit()
159
+ finally:
160
+ conn.close()
161
+
162
+ async def count_events(self) -> int:
163
+ """Total events in the log (for ``ckg status``)."""
164
+ return await asyncio.to_thread(self._count_events_sync)
165
+
166
+ def _count_events_sync(self) -> int:
167
+ conn = sqlite3.connect(str(self._path))
168
+ try:
169
+ return int(conn.execute("SELECT COUNT(*) FROM events").fetchone()[0])
170
+ finally:
171
+ conn.close()
172
+
173
+ # --- aggregates (churn / authorship, chunk 2) -------------------------
174
+
175
+ async def upsert_aggregates(self, aggs: list[SymbolAggregate]) -> int:
176
+ """Insert/replace churn/authorship rollups; return rows written."""
177
+ if not aggs:
178
+ return 0
179
+ return await asyncio.to_thread(self._upsert_aggregates_sync, aggs)
180
+
181
+ def _upsert_aggregates_sync(self, aggs: list[SymbolAggregate]) -> int:
182
+ conn = sqlite3.connect(str(self._path))
183
+ try:
184
+ cur = conn.executemany(
185
+ "INSERT OR REPLACE INTO aggregates"
186
+ "(symbol_id, churn_30d, churn_90d, top_authors,"
187
+ " introduced_sha, introduced_ts, last_changed_sha, last_changed_ts)"
188
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
189
+ [
190
+ (
191
+ a.symbol_id,
192
+ a.churn_30d,
193
+ a.churn_90d,
194
+ json.dumps([{"name": n, "commits": c} for n, c in a.top_authors]),
195
+ a.introduced_sha,
196
+ a.introduced_ts,
197
+ a.last_changed_sha,
198
+ a.last_changed_ts,
199
+ )
200
+ for a in aggs
201
+ ],
202
+ )
203
+ conn.commit()
204
+ return cur.rowcount if cur.rowcount is not None else 0
205
+ finally:
206
+ conn.close()
207
+
208
+ async def all_aggregates(self) -> list[SymbolAggregate]:
209
+ """Every stored rollup (for ``changed_since`` scans)."""
210
+ return await asyncio.to_thread(self._all_aggregates_sync)
211
+
212
+ def _all_aggregates_sync(self) -> list[SymbolAggregate]:
213
+ conn = sqlite3.connect(str(self._path))
214
+ try:
215
+ rows = conn.execute(
216
+ "SELECT symbol_id, churn_30d, churn_90d, top_authors,"
217
+ " introduced_sha, introduced_ts, last_changed_sha, last_changed_ts"
218
+ " FROM aggregates"
219
+ ).fetchall()
220
+ finally:
221
+ conn.close()
222
+ return [_row_to_aggregate(r) for r in rows]
223
+
224
+ async def aggregate_for(self, symbol_id: str) -> SymbolAggregate | None:
225
+ """The stored rollup for one symbol, or ``None``."""
226
+ return await asyncio.to_thread(self._aggregate_for_sync, symbol_id)
227
+
228
+ def _aggregate_for_sync(self, symbol_id: str) -> SymbolAggregate | None:
229
+ conn = sqlite3.connect(str(self._path))
230
+ try:
231
+ row = conn.execute(
232
+ "SELECT symbol_id, churn_30d, churn_90d, top_authors,"
233
+ " introduced_sha, introduced_ts, last_changed_sha, last_changed_ts"
234
+ " FROM aggregates WHERE symbol_id = ?",
235
+ (symbol_id,),
236
+ ).fetchone()
237
+ finally:
238
+ conn.close()
239
+ return _row_to_aggregate(row) if row else None
240
+
241
+ async def prune(self, before_ts: int) -> int:
242
+ """Delete CLOSED events older than ``before_ts`` (retention horizon).
243
+ OPENED events are kept (they anchor 'introduced'); full retention math
244
+ lands in chunk 5. Returns rows removed."""
245
+ return await asyncio.to_thread(self._prune_sync, before_ts)
246
+
247
+ def _prune_sync(self, before_ts: int) -> int:
248
+ conn = sqlite3.connect(str(self._path))
249
+ try:
250
+ cur = conn.execute(
251
+ "DELETE FROM events WHERE event = ? AND ts < ?",
252
+ (EventKind.CLOSED.value, before_ts),
253
+ )
254
+ conn.commit()
255
+ return cur.rowcount if cur.rowcount is not None else 0
256
+ finally:
257
+ conn.close()
258
+
259
+
260
+ def _row_to_event(r: tuple[Any, ...]) -> Event:
261
+ return Event(
262
+ symbol_id=r[0],
263
+ entity=Entity(r[1]),
264
+ event=EventKind(r[2]),
265
+ commit=r[3],
266
+ ts=r[4],
267
+ ref=r[5],
268
+ )
269
+
270
+
271
+ def _row_to_aggregate(r: tuple[Any, ...]) -> SymbolAggregate:
272
+ authors = [(a["name"], a["commits"]) for a in json.loads(r[3] or "[]")]
273
+ return SymbolAggregate(
274
+ symbol_id=r[0],
275
+ churn_30d=r[1],
276
+ churn_90d=r[2],
277
+ top_authors=authors,
278
+ introduced_sha=r[4] or "",
279
+ introduced_ts=r[5] or 0,
280
+ last_changed_sha=r[6] or "",
281
+ last_changed_ts=r[7] or 0,
282
+ )