agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,847 @@
|
|
|
1
|
+
"""``CodeGraph`` — the top-level user facade (spec §4.1).
|
|
2
|
+
|
|
3
|
+
``index`` builds the embedded store (feat-003), runs the pipeline, and
|
|
4
|
+
returns a handle exposing the ``Store`` and the ``IndexReport``. ``open``
|
|
5
|
+
re-opens an existing index without re-indexing. This is the
|
|
6
|
+
``CodeGraph.open`` feat-003 deferred here (where ``index`` lives).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import subprocess
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
from agentforge_graph.store import Store
|
|
16
|
+
|
|
17
|
+
from .pack import PackRegistry
|
|
18
|
+
from .packs import BUILTIN_PACKS, builtin_registry
|
|
19
|
+
from .pipeline import IngestPipeline
|
|
20
|
+
from .report import IndexReport, ModelInfo, RouteInfo, ServiceInfo
|
|
21
|
+
from .source import RepoSource
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
# embed/retrieve import ingest, so reference their types under TYPE_CHECKING.
|
|
25
|
+
from agentforge_graph.embed import EmbedReport
|
|
26
|
+
from agentforge_graph.enrich import SummaryInfo, TaggedInfo
|
|
27
|
+
from agentforge_graph.knowledge import DecisionInfo
|
|
28
|
+
from agentforge_graph.repomap import RankedSymbol
|
|
29
|
+
from agentforge_graph.retrieve import ContextPack
|
|
30
|
+
from agentforge_graph.retrieve.retriever import Mode
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _git_commit(repo_path: str | Path) -> str:
|
|
34
|
+
try:
|
|
35
|
+
out = subprocess.run(
|
|
36
|
+
["git", "-C", str(repo_path), "rev-parse", "HEAD"],
|
|
37
|
+
capture_output=True,
|
|
38
|
+
text=True,
|
|
39
|
+
check=True,
|
|
40
|
+
)
|
|
41
|
+
return out.stdout.strip()
|
|
42
|
+
except (subprocess.SubprocessError, OSError):
|
|
43
|
+
return ""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _commit_time(repo_path: str | Path, commit: str) -> int:
|
|
47
|
+
"""Author time (epoch seconds) of ``commit`` — the timestamp stamped on
|
|
48
|
+
feat-009 events. 0 if non-git / unknown."""
|
|
49
|
+
if not commit:
|
|
50
|
+
return 0
|
|
51
|
+
try:
|
|
52
|
+
out = subprocess.run(
|
|
53
|
+
["git", "-C", str(repo_path), "show", "-s", "--format=%ct", commit],
|
|
54
|
+
capture_output=True,
|
|
55
|
+
text=True,
|
|
56
|
+
check=True,
|
|
57
|
+
)
|
|
58
|
+
return int(out.stdout.strip() or 0)
|
|
59
|
+
except (subprocess.SubprocessError, OSError, ValueError):
|
|
60
|
+
return 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _build_recorder(repo_path: str | Path, config: str | Path | None, root: Path, commit: str): # type: ignore[no-untyped-def]
|
|
64
|
+
"""Build the feat-009 evolution-log recorder when ``temporal.enabled`` and
|
|
65
|
+
the source is a git repo; else ``None``. Lazy-imports ``temporal`` so the
|
|
66
|
+
module is never loaded when the feature is off."""
|
|
67
|
+
from agentforge_graph.config import TemporalConfig
|
|
68
|
+
|
|
69
|
+
if not commit or not TemporalConfig.load(config).enabled:
|
|
70
|
+
return None
|
|
71
|
+
from agentforge_graph.temporal import build_recorder
|
|
72
|
+
|
|
73
|
+
return build_recorder(str(root))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def _prune_temporal(repo_path: str | Path, config: str | Path | None, root: Path) -> None:
|
|
77
|
+
"""Retention pruning (feat-009 §4.10): drop CLOSED events older than the
|
|
78
|
+
``retention_commits`` horizon at the end of an index/refresh. No-op when
|
|
79
|
+
temporal is off, no sidecar exists, or history is shorter than the horizon."""
|
|
80
|
+
from agentforge_graph.config import TemporalConfig
|
|
81
|
+
|
|
82
|
+
cfg = TemporalConfig.load(config)
|
|
83
|
+
if not cfg.enabled or not (root / "temporal.db").exists():
|
|
84
|
+
return
|
|
85
|
+
horizon = _commit_time(repo_path, f"HEAD~{cfg.retention_commits}")
|
|
86
|
+
if horizon <= 0: # fewer than retention_commits commits → nothing to prune
|
|
87
|
+
return
|
|
88
|
+
from agentforge_graph.temporal import TemporalStore
|
|
89
|
+
|
|
90
|
+
await TemporalStore.open(root).prune(horizon)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _framework_extractor(
|
|
94
|
+
repo_path: str | Path, config: str | Path | None, registry: PackRegistry
|
|
95
|
+
) -> Any:
|
|
96
|
+
"""Detect the framework packs active for this repo (feat-011) and wrap them
|
|
97
|
+
in a ``FrameworkExtractor``. Inactive (no framework / ``frameworks: off``)
|
|
98
|
+
yields an empty extractor that the pipeline skips."""
|
|
99
|
+
from agentforge_graph.frameworks import (
|
|
100
|
+
FrameworkExtractor,
|
|
101
|
+
active_frameworks,
|
|
102
|
+
builtin_framework_registry,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
exts = {ext for p in registry.packs for ext in p.extensions}
|
|
106
|
+
packs = active_frameworks(repo_path, config, builtin_framework_registry(), exts)
|
|
107
|
+
return FrameworkExtractor(packs)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
async def _ingest_knowledge(
|
|
111
|
+
store: Store,
|
|
112
|
+
repo_path: str | Path,
|
|
113
|
+
config: str | Path | None,
|
|
114
|
+
repo: str,
|
|
115
|
+
commit: str,
|
|
116
|
+
registry: PackRegistry,
|
|
117
|
+
report: IndexReport,
|
|
118
|
+
) -> None:
|
|
119
|
+
"""Run the ADR/knowledge pass (feat-010) after code indexing, so mention
|
|
120
|
+
linking sees current code. No-op when ``knowledge.enabled`` is false."""
|
|
121
|
+
from agentforge_graph.config import KnowledgeConfig
|
|
122
|
+
from agentforge_graph.knowledge import CommitIngestor, KnowledgeIngestor
|
|
123
|
+
|
|
124
|
+
cfg = KnowledgeConfig.load(config)
|
|
125
|
+
if not cfg.enabled:
|
|
126
|
+
return
|
|
127
|
+
exts = {ext for p in registry.packs for ext in p.extensions}
|
|
128
|
+
stats = await KnowledgeIngestor(repo, commit).ingest(
|
|
129
|
+
store.graph, repo_path, cfg.adr_globs, exts, doc_globs=cfg.doc_globs
|
|
130
|
+
)
|
|
131
|
+
report.decisions_indexed = stats.decisions_indexed
|
|
132
|
+
report.governs_resolved = stats.governs_resolved
|
|
133
|
+
report.mentions_unresolved = stats.mentions_unresolved
|
|
134
|
+
report.docs_indexed = stats.docs_indexed
|
|
135
|
+
report.describes_resolved = stats.describes_resolved
|
|
136
|
+
if cfg.commit_messages:
|
|
137
|
+
report.commits_indexed = await CommitIngestor(
|
|
138
|
+
repo, repo_path, commit, limit=cfg.commit_messages_limit
|
|
139
|
+
).ingest(store.graph)
|
|
140
|
+
if stats.decisions_indexed:
|
|
141
|
+
report.by_node_kind["Decision"] = (
|
|
142
|
+
report.by_node_kind.get("Decision", 0) + stats.decisions_indexed
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _save_meta(
|
|
147
|
+
root: Path, commit: str, registry: PackRegistry, file_hashes: dict[str, str]
|
|
148
|
+
) -> None:
|
|
149
|
+
"""Persist the index manifest atomically, *last* — so a crash anywhere
|
|
150
|
+
earlier leaves the previous consistent manifest and the next run re-derives
|
|
151
|
+
the diff from it (feat-004)."""
|
|
152
|
+
from .incremental import IndexMeta
|
|
153
|
+
|
|
154
|
+
IndexMeta(
|
|
155
|
+
indexed_commit=commit,
|
|
156
|
+
pack_versions=IndexMeta.fingerprints(registry.packs),
|
|
157
|
+
files=file_hashes,
|
|
158
|
+
).save(root)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _registry_for(languages: str | list[str] | None) -> PackRegistry:
|
|
162
|
+
if languages is None or languages == "auto":
|
|
163
|
+
return builtin_registry()
|
|
164
|
+
wanted = {languages} if isinstance(languages, str) else set(languages)
|
|
165
|
+
packs = [p for p in BUILTIN_PACKS if p.language in wanted or p.lang_slug in wanted]
|
|
166
|
+
return PackRegistry(packs)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _source_registry(
|
|
170
|
+
repo_path: str | Path,
|
|
171
|
+
config: str | Path | None,
|
|
172
|
+
languages: str | list[str] | None,
|
|
173
|
+
include: list[str] | None = None,
|
|
174
|
+
exclude: list[str] | None = None,
|
|
175
|
+
) -> tuple[RepoSource, PackRegistry]:
|
|
176
|
+
from agentforge_graph.config import IngestConfig
|
|
177
|
+
|
|
178
|
+
ingest = IngestConfig.load(config)
|
|
179
|
+
registry = _registry_for(languages if languages is not None else ingest.languages)
|
|
180
|
+
source = RepoSource(
|
|
181
|
+
repo_path,
|
|
182
|
+
include=include,
|
|
183
|
+
exclude=ingest.exclude + (exclude or []),
|
|
184
|
+
max_file_kb=ingest.max_file_kb,
|
|
185
|
+
)
|
|
186
|
+
return source, registry
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class CodeGraph:
|
|
190
|
+
def __init__(
|
|
191
|
+
self,
|
|
192
|
+
store: Store,
|
|
193
|
+
repo_path: str | Path = ".",
|
|
194
|
+
config: str | Path | None = None,
|
|
195
|
+
languages: str | list[str] | None = None,
|
|
196
|
+
report: IndexReport | None = None,
|
|
197
|
+
) -> None:
|
|
198
|
+
self._store = store
|
|
199
|
+
self._repo_path = repo_path
|
|
200
|
+
self._config = config
|
|
201
|
+
self._languages = languages
|
|
202
|
+
self._report = report
|
|
203
|
+
self._embed_report: EmbedReport | None = None
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
async def index(
|
|
207
|
+
cls,
|
|
208
|
+
repo_path: str | Path = ".",
|
|
209
|
+
languages: str | list[str] | None = None,
|
|
210
|
+
config: str | Path | None = None,
|
|
211
|
+
include: list[str] | None = None,
|
|
212
|
+
exclude: list[str] | None = None,
|
|
213
|
+
embed: bool = False,
|
|
214
|
+
full: bool = False,
|
|
215
|
+
) -> CodeGraph:
|
|
216
|
+
"""Index ``repo_path``. Incremental by default once a prior index
|
|
217
|
+
exists (feat-004) — only the diff is re-extracted/re-resolved. ``full``
|
|
218
|
+
(or a changed pack fingerprint / schema bump / ``ingest.incremental:
|
|
219
|
+
false``) forces a clean rebuild."""
|
|
220
|
+
from agentforge_graph.config import IngestConfig, StoreConfig
|
|
221
|
+
|
|
222
|
+
from .incremental import ChangeDetector, IndexMeta
|
|
223
|
+
|
|
224
|
+
store = await Store.open(repo_path, config)
|
|
225
|
+
source, registry = _source_registry(repo_path, config, languages, include, exclude)
|
|
226
|
+
repo = Path(repo_path).resolve().name
|
|
227
|
+
commit = _git_commit(repo_path)
|
|
228
|
+
root = Path(repo_path) / StoreConfig.load(config).path
|
|
229
|
+
ingest_cfg = IngestConfig.load(config)
|
|
230
|
+
meta = IndexMeta.load(root)
|
|
231
|
+
|
|
232
|
+
use_incremental = (
|
|
233
|
+
ingest_cfg.incremental
|
|
234
|
+
and not full
|
|
235
|
+
and meta.is_indexed()
|
|
236
|
+
and not meta.packs_changed(registry.packs)
|
|
237
|
+
)
|
|
238
|
+
cg = cls(store, repo_path, config, languages)
|
|
239
|
+
frameworks = _framework_extractor(repo_path, config, registry)
|
|
240
|
+
recorder = _build_recorder(repo_path, config, root, commit) # feat-009 (None if off)
|
|
241
|
+
result = await ChangeDetector(repo_path).detect(source, meta, registry)
|
|
242
|
+
if use_incremental:
|
|
243
|
+
report = await cg._apply_changes(
|
|
244
|
+
source,
|
|
245
|
+
registry,
|
|
246
|
+
repo,
|
|
247
|
+
commit,
|
|
248
|
+
result.changes,
|
|
249
|
+
ingest_cfg.resolve_scope_hops,
|
|
250
|
+
root,
|
|
251
|
+
frameworks,
|
|
252
|
+
recorder,
|
|
253
|
+
_commit_time(repo_path, commit),
|
|
254
|
+
)
|
|
255
|
+
else:
|
|
256
|
+
report = await IngestPipeline(repo=repo, commit=commit, frameworks=frameworks).run(
|
|
257
|
+
source, store.graph, registry
|
|
258
|
+
)
|
|
259
|
+
if recorder is not None: # full index: open intervals for all symbols
|
|
260
|
+
from agentforge_graph.temporal import seed_symbols
|
|
261
|
+
|
|
262
|
+
await seed_symbols(
|
|
263
|
+
store.graph,
|
|
264
|
+
recorder,
|
|
265
|
+
commit,
|
|
266
|
+
_commit_time(repo_path, commit),
|
|
267
|
+
repo_root=str(repo_path),
|
|
268
|
+
)
|
|
269
|
+
cg._report = report
|
|
270
|
+
await _ingest_knowledge(store, repo_path, config, repo, commit, registry, report)
|
|
271
|
+
_save_meta(root, commit, registry, result.file_hashes)
|
|
272
|
+
await _prune_temporal(repo_path, config, root)
|
|
273
|
+
if embed:
|
|
274
|
+
await cg.embed()
|
|
275
|
+
return cg
|
|
276
|
+
|
|
277
|
+
async def refresh(self) -> IndexReport:
|
|
278
|
+
"""Re-index only what changed since the last index (feat-004). The
|
|
279
|
+
explicit incremental entry point; ``index()`` calls the same path."""
|
|
280
|
+
from agentforge_graph.config import IngestConfig, StoreConfig
|
|
281
|
+
|
|
282
|
+
from .incremental import ChangeDetector, IndexMeta
|
|
283
|
+
|
|
284
|
+
source, registry = _source_registry(self._repo_path, self._config, self._languages)
|
|
285
|
+
repo = Path(self._repo_path).resolve().name
|
|
286
|
+
commit = _git_commit(self._repo_path)
|
|
287
|
+
root = Path(self._repo_path) / StoreConfig.load(self._config).path
|
|
288
|
+
ingest_cfg = IngestConfig.load(self._config)
|
|
289
|
+
meta = IndexMeta.load(root)
|
|
290
|
+
frameworks = _framework_extractor(self._repo_path, self._config, registry)
|
|
291
|
+
recorder = _build_recorder(self._repo_path, self._config, root, commit) # feat-009
|
|
292
|
+
result = await ChangeDetector(self._repo_path).detect(source, meta, registry)
|
|
293
|
+
report = await self._apply_changes(
|
|
294
|
+
source,
|
|
295
|
+
registry,
|
|
296
|
+
repo,
|
|
297
|
+
commit,
|
|
298
|
+
result.changes,
|
|
299
|
+
ingest_cfg.resolve_scope_hops,
|
|
300
|
+
root,
|
|
301
|
+
frameworks,
|
|
302
|
+
recorder,
|
|
303
|
+
_commit_time(self._repo_path, commit),
|
|
304
|
+
)
|
|
305
|
+
self._report = report
|
|
306
|
+
await _ingest_knowledge(
|
|
307
|
+
self._store, self._repo_path, self._config, repo, commit, registry, report
|
|
308
|
+
)
|
|
309
|
+
_save_meta(root, commit, registry, result.file_hashes)
|
|
310
|
+
await _prune_temporal(self._repo_path, self._config, root)
|
|
311
|
+
return report
|
|
312
|
+
|
|
313
|
+
async def _apply_changes(
|
|
314
|
+
self,
|
|
315
|
+
source: RepoSource,
|
|
316
|
+
registry: PackRegistry,
|
|
317
|
+
repo: str,
|
|
318
|
+
commit: str,
|
|
319
|
+
changes: object,
|
|
320
|
+
resolve_scope_hops: int,
|
|
321
|
+
root: Path,
|
|
322
|
+
frameworks: Any = None,
|
|
323
|
+
recorder: Any = None,
|
|
324
|
+
commit_ts: int = 0,
|
|
325
|
+
) -> IndexReport:
|
|
326
|
+
from .incremental import ChangeSet, DirtySet, IncrementalIndexer
|
|
327
|
+
|
|
328
|
+
assert isinstance(changes, ChangeSet)
|
|
329
|
+
indexer = IncrementalIndexer(
|
|
330
|
+
self._store,
|
|
331
|
+
source,
|
|
332
|
+
registry,
|
|
333
|
+
repo,
|
|
334
|
+
commit,
|
|
335
|
+
resolve_scope_hops=resolve_scope_hops,
|
|
336
|
+
dirty=DirtySet(root),
|
|
337
|
+
frameworks=frameworks,
|
|
338
|
+
recorder=recorder,
|
|
339
|
+
commit_ts=commit_ts,
|
|
340
|
+
)
|
|
341
|
+
return await indexer.refresh(changes)
|
|
342
|
+
|
|
343
|
+
@classmethod
|
|
344
|
+
async def open(
|
|
345
|
+
cls,
|
|
346
|
+
repo_path: str | Path = ".",
|
|
347
|
+
config: str | Path | None = None,
|
|
348
|
+
languages: str | list[str] | None = None,
|
|
349
|
+
) -> CodeGraph:
|
|
350
|
+
return cls(await Store.open(repo_path, config), repo_path, config, languages)
|
|
351
|
+
|
|
352
|
+
async def embed(self, embedder: object | None = None, only_dirty: bool = False) -> EmbedReport:
|
|
353
|
+
"""Chunk and embed everything indexed. Builds the embedder from
|
|
354
|
+
``EmbedConfig`` if not supplied. With ``only_dirty`` (feat-004), embed
|
|
355
|
+
only the files a refresh dirtied for the ``embeddings`` consumer and
|
|
356
|
+
mark them clean — the cheap path after an incremental index."""
|
|
357
|
+
from agentforge_graph.chunking import CASTChunker
|
|
358
|
+
from agentforge_graph.config import ChunkingConfig, EmbedConfig, StoreConfig
|
|
359
|
+
from agentforge_graph.core import SymbolID
|
|
360
|
+
from agentforge_graph.embed import Embedder, EmbedPipeline, embedder_from_config
|
|
361
|
+
|
|
362
|
+
from .incremental import DirtySet
|
|
363
|
+
|
|
364
|
+
chunking = ChunkingConfig.load(self._config)
|
|
365
|
+
emb = (
|
|
366
|
+
embedder
|
|
367
|
+
if isinstance(embedder, Embedder)
|
|
368
|
+
else embedder_from_config(EmbedConfig.load(self._config))
|
|
369
|
+
)
|
|
370
|
+
source, registry = _source_registry(self._repo_path, self._config, self._languages)
|
|
371
|
+
pipeline = EmbedPipeline(
|
|
372
|
+
CASTChunker(chunking.max_tokens, chunking.min_tokens),
|
|
373
|
+
emb,
|
|
374
|
+
commit=_git_commit(self._repo_path),
|
|
375
|
+
)
|
|
376
|
+
root = Path(self._repo_path) / StoreConfig.load(self._config).path
|
|
377
|
+
dirty: DirtySet | None = None
|
|
378
|
+
only_paths: set[str] | None = None
|
|
379
|
+
ids: list[str] = []
|
|
380
|
+
if only_dirty:
|
|
381
|
+
dirty = DirtySet(root)
|
|
382
|
+
ids = await dirty.dirty_for("embeddings")
|
|
383
|
+
only_paths = {SymbolID.parse(i).path for i in ids}
|
|
384
|
+
self._embed_report = await pipeline.run(
|
|
385
|
+
self._store, source, registry, only_paths=only_paths, doc_root=root
|
|
386
|
+
)
|
|
387
|
+
if dirty is not None:
|
|
388
|
+
await dirty.mark_clean("embeddings", ids)
|
|
389
|
+
return self._embed_report
|
|
390
|
+
|
|
391
|
+
async def retrieve(
|
|
392
|
+
self,
|
|
393
|
+
query: str | None = None,
|
|
394
|
+
symbol: str | None = None,
|
|
395
|
+
mode: Mode = "context",
|
|
396
|
+
k: int | None = None,
|
|
397
|
+
depth: int | None = None,
|
|
398
|
+
embedder: object | None = None,
|
|
399
|
+
include_llm_facts: bool = True,
|
|
400
|
+
as_of: str | None = None,
|
|
401
|
+
) -> ContextPack:
|
|
402
|
+
"""Hybrid retrieval (feat-006): vector entry + graph expansion.
|
|
403
|
+
``include_llm_facts=False`` excludes llm-derived items (decisions tags
|
|
404
|
+
etc.) wholesale (feat-010/012). ``as_of=<commit>`` (feat-009) restricts
|
|
405
|
+
results to the symbols valid at that commit — code symbols added after it
|
|
406
|
+
are dropped; raises ``TemporalError`` with no temporal data or beyond the
|
|
407
|
+
retention horizon."""
|
|
408
|
+
from agentforge_graph.config import EmbedConfig, RetrieveConfig
|
|
409
|
+
from agentforge_graph.embed import Embedder, embedder_from_config
|
|
410
|
+
from agentforge_graph.retrieve import Retriever
|
|
411
|
+
from agentforge_graph.retrieve.rerank import reranker_from_config
|
|
412
|
+
|
|
413
|
+
allow_ids: set[str] | None = None
|
|
414
|
+
if as_of is not None:
|
|
415
|
+
from agentforge_graph.temporal import TemporalError
|
|
416
|
+
|
|
417
|
+
ti = self._temporal_index()
|
|
418
|
+
if ti is None:
|
|
419
|
+
raise TemporalError("as_of requested but no temporal log exists")
|
|
420
|
+
allow_ids = await ti.alive_at(as_of)
|
|
421
|
+
|
|
422
|
+
emb = (
|
|
423
|
+
embedder
|
|
424
|
+
if isinstance(embedder, Embedder)
|
|
425
|
+
else embedder_from_config(EmbedConfig.load(self._config))
|
|
426
|
+
)
|
|
427
|
+
rcfg = RetrieveConfig.load(self._config)
|
|
428
|
+
retriever = Retriever(
|
|
429
|
+
self._store,
|
|
430
|
+
emb,
|
|
431
|
+
rcfg,
|
|
432
|
+
reranker=reranker_from_config(rcfg.rerank, rcfg.rerank_weight, rcfg.rerank_model),
|
|
433
|
+
)
|
|
434
|
+
return await retriever.retrieve(
|
|
435
|
+
query=query,
|
|
436
|
+
symbol=symbol,
|
|
437
|
+
mode=mode,
|
|
438
|
+
k=k,
|
|
439
|
+
depth=depth,
|
|
440
|
+
include_llm_facts=include_llm_facts,
|
|
441
|
+
allow_ids=allow_ids,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
async def repo_map(
|
|
445
|
+
self,
|
|
446
|
+
budget_tokens: int | None = None,
|
|
447
|
+
focus: list[str] | None = None,
|
|
448
|
+
scope: str | None = None,
|
|
449
|
+
) -> str:
|
|
450
|
+
"""Budget-aware, centrality-ranked repo map (feat-007)."""
|
|
451
|
+
from agentforge_graph.config import RepoMapConfig
|
|
452
|
+
from agentforge_graph.repomap import RepoMap
|
|
453
|
+
|
|
454
|
+
rm = RepoMap(self._store, RepoMapConfig.load(self._config))
|
|
455
|
+
return await rm.render(budget_tokens=budget_tokens, focus=focus, scope=scope)
|
|
456
|
+
|
|
457
|
+
async def ranked_symbols(
|
|
458
|
+
self, k: int = 100, focus: list[str] | None = None
|
|
459
|
+
) -> list[RankedSymbol]:
|
|
460
|
+
from agentforge_graph.config import RepoMapConfig
|
|
461
|
+
from agentforge_graph.repomap import RepoMap
|
|
462
|
+
|
|
463
|
+
rm = RepoMap(self._store, RepoMapConfig.load(self._config))
|
|
464
|
+
return await rm.ranked_symbols(k=k, focus=focus)
|
|
465
|
+
|
|
466
|
+
async def routes(self) -> list[RouteInfo]:
|
|
467
|
+
"""Every extracted endpoint (feat-011): method, path pattern, handler
|
|
468
|
+
symbol and source location, sorted by (path, method)."""
|
|
469
|
+
from agentforge_graph.core import GraphQuery, NodeKind, SymbolID
|
|
470
|
+
|
|
471
|
+
nodes = (
|
|
472
|
+
await self._store.graph.query(GraphQuery(kinds=[NodeKind.ROUTE], limit=10_000_000))
|
|
473
|
+
).nodes
|
|
474
|
+
routes = [
|
|
475
|
+
RouteInfo(
|
|
476
|
+
method=str(n.attrs.get("method", "")),
|
|
477
|
+
path=str(n.attrs.get("path", "")),
|
|
478
|
+
framework=str(n.attrs.get("framework", "")),
|
|
479
|
+
handler=str(n.attrs.get("handler", "")),
|
|
480
|
+
file=SymbolID.parse(n.id).path,
|
|
481
|
+
line=n.span[0] if n.span else 0,
|
|
482
|
+
)
|
|
483
|
+
for n in nodes
|
|
484
|
+
]
|
|
485
|
+
routes.sort(key=lambda r: (r.path, r.method))
|
|
486
|
+
return routes
|
|
487
|
+
|
|
488
|
+
async def models(self) -> list[ModelInfo]:
|
|
489
|
+
"""Every extracted ORM data model (feat-011): table, framework, mapped
|
|
490
|
+
field names, ``RELATES_TO`` relations (FK / relationship), the underlying
|
|
491
|
+
class symbol and source location, sorted by name."""
|
|
492
|
+
from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, SymbolID
|
|
493
|
+
|
|
494
|
+
nodes = (
|
|
495
|
+
await self._store.graph.query(GraphQuery(kinds=[NodeKind.DATA_MODEL], limit=10_000_000))
|
|
496
|
+
).nodes
|
|
497
|
+
name_of = {n.id: str(n.attrs.get("table") or n.name) for n in nodes}
|
|
498
|
+
models: list[ModelInfo] = []
|
|
499
|
+
for n in nodes:
|
|
500
|
+
fields = await self._store.graph.neighbors(n.id, [EdgeKind.HAS_FIELD], depth=1)
|
|
501
|
+
field_names = sorted(f.name for f in fields if f.kind is NodeKind.VARIABLE and f.name)
|
|
502
|
+
rel_edges = await self._store.graph.adjacent(
|
|
503
|
+
n.id, [EdgeKind.RELATES_TO], direction="out"
|
|
504
|
+
)
|
|
505
|
+
relations = sorted(
|
|
506
|
+
(
|
|
507
|
+
{
|
|
508
|
+
"to": name_of.get(e.dst, e.dst),
|
|
509
|
+
"kind": str(e.attrs.get("kind", "")),
|
|
510
|
+
"via": str(e.attrs.get("via", "")),
|
|
511
|
+
}
|
|
512
|
+
for e in rel_edges
|
|
513
|
+
),
|
|
514
|
+
key=lambda r: (r["to"], r["via"]),
|
|
515
|
+
)
|
|
516
|
+
models.append(
|
|
517
|
+
ModelInfo(
|
|
518
|
+
name=str(n.attrs.get("table") or n.name),
|
|
519
|
+
table=str(n.attrs.get("table", "")),
|
|
520
|
+
framework=str(n.attrs.get("framework", "")),
|
|
521
|
+
fields=field_names,
|
|
522
|
+
relations=relations,
|
|
523
|
+
cls=str(n.attrs.get("class", "")),
|
|
524
|
+
file=SymbolID.parse(n.id).path,
|
|
525
|
+
line=n.span[0] if n.span else 0,
|
|
526
|
+
)
|
|
527
|
+
)
|
|
528
|
+
models.sort(key=lambda m: m.name)
|
|
529
|
+
return models
|
|
530
|
+
|
|
531
|
+
async def services(self) -> list[ServiceInfo]:
|
|
532
|
+
"""Every DI-provided service (feat-011): provider name, framework, the
|
|
533
|
+
consumer symbols it is ``INJECTED_INTO``, and source location, sorted by
|
|
534
|
+
name."""
|
|
535
|
+
from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, SymbolID
|
|
536
|
+
|
|
537
|
+
nodes = (
|
|
538
|
+
await self._store.graph.query(GraphQuery(kinds=[NodeKind.SERVICE], limit=10_000_000))
|
|
539
|
+
).nodes
|
|
540
|
+
services: list[ServiceInfo] = []
|
|
541
|
+
for n in nodes:
|
|
542
|
+
edges = await self._store.graph.adjacent(
|
|
543
|
+
n.id, [EdgeKind.INJECTED_INTO], direction="out"
|
|
544
|
+
)
|
|
545
|
+
services.append(
|
|
546
|
+
ServiceInfo(
|
|
547
|
+
name=str(n.attrs.get("provider") or n.name),
|
|
548
|
+
framework=str(n.attrs.get("framework", "")),
|
|
549
|
+
injected_into=sorted(e.dst for e in edges),
|
|
550
|
+
file=SymbolID.parse(n.id).path,
|
|
551
|
+
line=n.span[0] if n.span else 0,
|
|
552
|
+
)
|
|
553
|
+
)
|
|
554
|
+
services.sort(key=lambda s: s.name)
|
|
555
|
+
return services
|
|
556
|
+
|
|
557
|
+
def _temporal_index(self) -> Any:
|
|
558
|
+
"""A ``TemporalIndex`` over the sidecar, or ``None`` when the evolution
|
|
559
|
+
log is absent (temporal never enabled / no git). Lazy-imports the
|
|
560
|
+
higher temporal layer (ADR-0001)."""
|
|
561
|
+
from agentforge_graph.config import StoreConfig, TemporalConfig
|
|
562
|
+
|
|
563
|
+
root = Path(self._repo_path) / StoreConfig.load(self._config).path
|
|
564
|
+
if not (root / "temporal.db").exists():
|
|
565
|
+
return None
|
|
566
|
+
from agentforge_graph.temporal import TemporalIndex, TemporalStore
|
|
567
|
+
|
|
568
|
+
return TemporalIndex(
|
|
569
|
+
TemporalStore.open(root),
|
|
570
|
+
self._store.graph,
|
|
571
|
+
repo_root=str(self._repo_path),
|
|
572
|
+
retention_commits=TemporalConfig.load(self._config).retention_commits,
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
async def history(self, symbol_id: str) -> Any:
|
|
576
|
+
"""A symbol's evolution (feat-009): introduced / last-changed / churn /
|
|
577
|
+
authors / lifecycle events. ``None`` if the temporal layer has no data."""
|
|
578
|
+
ti = self._temporal_index()
|
|
579
|
+
return await ti.history(symbol_id) if ti is not None else None
|
|
580
|
+
|
|
581
|
+
async def changed_since(self, ref: str, scope: str | None = None) -> list[Any]:
|
|
582
|
+
"""Symbols changed since ``ref`` (feat-009), newest first, optionally
|
|
583
|
+
filtered to a path glob/prefix ``scope``. Empty if no temporal data."""
|
|
584
|
+
ti = self._temporal_index()
|
|
585
|
+
return await ti.changed_since(ref, scope) if ti is not None else []
|
|
586
|
+
|
|
587
|
+
async def backfill(self, history: int) -> Any:
|
|
588
|
+
"""Seed the evolution log from git history (feat-009 chunk 4):
|
|
589
|
+
``history`` commits replayed into the temporal sidecar. Returns a
|
|
590
|
+
``BackfillReport``; a no-op (``ran=False``) when temporal is off, the
|
|
591
|
+
range is already covered, or it isn't a git repo."""
|
|
592
|
+
from agentforge_graph.temporal.backfill import run_backfill
|
|
593
|
+
|
|
594
|
+
return await run_backfill(self._repo_path, self._config, history, languages=self._languages)
|
|
595
|
+
|
|
596
|
+
async def temporal_status(self) -> dict[str, Any]:
|
|
597
|
+
"""Temporal sidecar summary for ``ckg status``: whether the feature is
|
|
598
|
+
enabled, how many events the log holds, and how far back history has
|
|
599
|
+
been backfilled."""
|
|
600
|
+
from agentforge_graph.config import StoreConfig, TemporalConfig
|
|
601
|
+
|
|
602
|
+
enabled = TemporalConfig.load(self._config).enabled
|
|
603
|
+
root = Path(self._repo_path) / StoreConfig.load(self._config).path
|
|
604
|
+
db = root / "temporal.db"
|
|
605
|
+
if not db.exists():
|
|
606
|
+
return {"enabled": enabled, "events": 0, "has_sidecar": False, "backfilled_through": ""}
|
|
607
|
+
from agentforge_graph.temporal import TemporalStore
|
|
608
|
+
|
|
609
|
+
store = TemporalStore.open(root)
|
|
610
|
+
return {
|
|
611
|
+
"enabled": enabled,
|
|
612
|
+
"events": await store.count_events(),
|
|
613
|
+
"has_sidecar": True,
|
|
614
|
+
"backfilled_through": await store.get_meta("backfilled_through") or "",
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
async def decisions(
|
|
618
|
+
self, scope: str | None = None, status: str | None = None
|
|
619
|
+
) -> list[DecisionInfo]:
|
|
620
|
+
"""Architecture decisions (feat-010). ``scope`` keeps a decision whose
|
|
621
|
+
own path is under the prefix or which governs a symbol under it;
|
|
622
|
+
``status`` filters by ADR status. Sorted by (status, date desc)."""
|
|
623
|
+
from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, SymbolID
|
|
624
|
+
from agentforge_graph.knowledge import DecisionInfo
|
|
625
|
+
|
|
626
|
+
nodes = (
|
|
627
|
+
await self._store.graph.query(GraphQuery(kinds=[NodeKind.DECISION], limit=10_000_000))
|
|
628
|
+
).nodes
|
|
629
|
+
out: list[DecisionInfo] = []
|
|
630
|
+
for n in nodes:
|
|
631
|
+
governs = [
|
|
632
|
+
e.dst for e in await self._store.graph.adjacent(n.id, [EdgeKind.GOVERNS], "out")
|
|
633
|
+
]
|
|
634
|
+
if status and str(n.attrs.get("status", "")) != status:
|
|
635
|
+
continue
|
|
636
|
+
if scope:
|
|
637
|
+
own = SymbolID.parse(n.id).path
|
|
638
|
+
if not own.startswith(scope) and not any(
|
|
639
|
+
SymbolID.parse(g).path.startswith(scope) for g in governs
|
|
640
|
+
):
|
|
641
|
+
continue
|
|
642
|
+
out.append(
|
|
643
|
+
DecisionInfo(
|
|
644
|
+
id=n.id,
|
|
645
|
+
adr_id=str(n.attrs.get("adr_id", "")),
|
|
646
|
+
title=str(n.attrs.get("title", n.name)),
|
|
647
|
+
status=str(n.attrs.get("status", "")),
|
|
648
|
+
date=str(n.attrs.get("date", "")),
|
|
649
|
+
path=str(n.attrs.get("path", SymbolID.parse(n.id).path)),
|
|
650
|
+
governs=governs,
|
|
651
|
+
)
|
|
652
|
+
)
|
|
653
|
+
out.sort(key=lambda d: (d.status, d.date), reverse=True)
|
|
654
|
+
return out
|
|
655
|
+
|
|
656
|
+
async def enrich(self, judge: object | None = None, budget_usd: float | None = None) -> Any:
|
|
657
|
+
"""LLM pattern enrichment (feat-012). Drains the ``patterns`` DirtySet
|
|
658
|
+
if non-empty (incremental), else tags all Class/Function symbols. Builds
|
|
659
|
+
the Bedrock judge from ``EnrichConfig`` unless one is supplied. Returns
|
|
660
|
+
an ``EnrichReport``. Never runs implicitly — explicit call only."""
|
|
661
|
+
from agentforge_graph.config import EnrichConfig, StoreConfig
|
|
662
|
+
from agentforge_graph.core import GraphQuery
|
|
663
|
+
from agentforge_graph.enrich import PatternHeuristics, PatternJudge, PatternTagEnricher
|
|
664
|
+
from agentforge_graph.enrich.heuristics import Recall, class_and_function_ids
|
|
665
|
+
|
|
666
|
+
from .incremental import DirtySet
|
|
667
|
+
|
|
668
|
+
cfg = EnrichConfig.load(self._config)
|
|
669
|
+
repo = Path(self._repo_path).resolve().name
|
|
670
|
+
root = Path(self._repo_path) / StoreConfig.load(self._config).path
|
|
671
|
+
if isinstance(judge, PatternJudge):
|
|
672
|
+
the_judge: PatternJudge = judge
|
|
673
|
+
else:
|
|
674
|
+
from agentforge_graph.enrich.registry import judge_from_config
|
|
675
|
+
|
|
676
|
+
the_judge = judge_from_config(cfg) # ENH-003: provider-selected
|
|
677
|
+
|
|
678
|
+
dirty = DirtySet(root)
|
|
679
|
+
dirty_ids = await dirty.dirty_for("patterns")
|
|
680
|
+
if dirty_ids:
|
|
681
|
+
symbol_ids = dirty_ids
|
|
682
|
+
else:
|
|
683
|
+
nodes = (await self._store.graph.query(GraphQuery(limit=10_000_000))).nodes
|
|
684
|
+
symbol_ids = class_and_function_ids(nodes)
|
|
685
|
+
|
|
686
|
+
recall: Recall = "broad" if cfg.patterns_recall == "broad" else "conservative"
|
|
687
|
+
enricher = PatternTagEnricher(
|
|
688
|
+
repo,
|
|
689
|
+
the_judge,
|
|
690
|
+
heuristics=PatternHeuristics(recall=recall),
|
|
691
|
+
confidence_floor=cfg.confidence_floor,
|
|
692
|
+
budget_usd=budget_usd if budget_usd is not None else cfg.budget_usd,
|
|
693
|
+
concurrency=cfg.concurrency,
|
|
694
|
+
commit=_git_commit(self._repo_path),
|
|
695
|
+
)
|
|
696
|
+
report = await enricher.enrich(self._store.graph, symbol_ids)
|
|
697
|
+
await dirty.mark_clean("patterns", enricher.last_judged_ids)
|
|
698
|
+
return report
|
|
699
|
+
|
|
700
|
+
async def infer_governs(
|
|
701
|
+
self, matcher: object | None = None, budget_usd: float | None = None
|
|
702
|
+
) -> Any:
|
|
703
|
+
"""LLM ``infer_governs`` pass (feat-010): for ADRs whose prose names no
|
|
704
|
+
code, match the decision text against repo symbols and write ``GOVERNS``
|
|
705
|
+
edges with ``llm`` provenance. Only decisions with zero *parsed* GOVERNS
|
|
706
|
+
are touched. Builds the matcher from ``EnrichConfig`` (provider) unless one
|
|
707
|
+
is supplied; budget from ``knowledge.infer_budget_usd``. Explicit call only
|
|
708
|
+
(``ckg enrich --decisions``); returns a ``GovernsReport``."""
|
|
709
|
+
from agentforge_graph.config import EnrichConfig, KnowledgeConfig
|
|
710
|
+
from agentforge_graph.enrich import DecisionGovernsInferencer, GovernsMatcher
|
|
711
|
+
|
|
712
|
+
ecfg = EnrichConfig.load(self._config)
|
|
713
|
+
kcfg = KnowledgeConfig.load(self._config)
|
|
714
|
+
repo = Path(self._repo_path).resolve().name
|
|
715
|
+
if isinstance(matcher, GovernsMatcher):
|
|
716
|
+
the_matcher: GovernsMatcher = matcher
|
|
717
|
+
else:
|
|
718
|
+
from agentforge_graph.enrich.registry import governs_matcher_from_config
|
|
719
|
+
|
|
720
|
+
the_matcher = governs_matcher_from_config(ecfg)
|
|
721
|
+
|
|
722
|
+
inferencer = DecisionGovernsInferencer(
|
|
723
|
+
repo,
|
|
724
|
+
the_matcher,
|
|
725
|
+
confidence_floor=ecfg.confidence_floor,
|
|
726
|
+
budget_usd=budget_usd if budget_usd is not None else kcfg.infer_budget_usd,
|
|
727
|
+
commit=_git_commit(self._repo_path),
|
|
728
|
+
)
|
|
729
|
+
return await inferencer.enrich(self._store.graph)
|
|
730
|
+
|
|
731
|
+
async def tagged(self, pattern: str, min_confidence: float = 0.7) -> list[TaggedInfo]:
|
|
732
|
+
"""Symbols carrying ``pattern`` above ``min_confidence`` (feat-012)."""
|
|
733
|
+
from agentforge_graph.core import EdgeKind, SymbolID
|
|
734
|
+
from agentforge_graph.enrich import TaggedInfo, pattern_tag_id
|
|
735
|
+
|
|
736
|
+
repo = Path(self._repo_path).resolve().name
|
|
737
|
+
tag_id = pattern_tag_id(repo, pattern)
|
|
738
|
+
if await self._store.graph.get(tag_id) is None:
|
|
739
|
+
return []
|
|
740
|
+
out: list[TaggedInfo] = []
|
|
741
|
+
for e in await self._store.graph.adjacent(tag_id, [EdgeKind.TAGGED], "in"):
|
|
742
|
+
conf = float(e.attrs.get("confidence", 0.0))
|
|
743
|
+
if conf >= min_confidence and SymbolID.parse(e.src).descriptor:
|
|
744
|
+
out.append(
|
|
745
|
+
TaggedInfo(
|
|
746
|
+
symbol_id=e.src,
|
|
747
|
+
pattern=pattern,
|
|
748
|
+
confidence=conf,
|
|
749
|
+
rationale=str(e.attrs.get("rationale", "")),
|
|
750
|
+
)
|
|
751
|
+
)
|
|
752
|
+
out.sort(key=lambda t: t.confidence, reverse=True)
|
|
753
|
+
return out
|
|
754
|
+
|
|
755
|
+
async def summarize(
|
|
756
|
+
self, summarizer: object | None = None, budget_usd: float | None = None
|
|
757
|
+
) -> Any:
|
|
758
|
+
"""Bottom-up module summaries (feat-012): file summaries + one repo
|
|
759
|
+
summary, embedded for concept search. Drains DirtySet("summaries") if
|
|
760
|
+
non-empty, else summarizes all files. Builds the Bedrock summarizer +
|
|
761
|
+
embedder from config unless supplied. Explicit call only."""
|
|
762
|
+
from agentforge_graph.config import EmbedConfig, EnrichConfig, StoreConfig
|
|
763
|
+
from agentforge_graph.core import GraphQuery, NodeKind, SymbolID
|
|
764
|
+
from agentforge_graph.embed import embedder_from_config
|
|
765
|
+
from agentforge_graph.enrich import Summarizer, SummaryEnricher
|
|
766
|
+
|
|
767
|
+
from .incremental import DirtySet
|
|
768
|
+
|
|
769
|
+
cfg = EnrichConfig.load(self._config)
|
|
770
|
+
repo = Path(self._repo_path).resolve().name
|
|
771
|
+
root = Path(self._repo_path) / StoreConfig.load(self._config).path
|
|
772
|
+
if isinstance(summarizer, Summarizer):
|
|
773
|
+
the_summarizer: Summarizer = summarizer
|
|
774
|
+
else:
|
|
775
|
+
from agentforge_graph.enrich.registry import summarizer_from_config
|
|
776
|
+
|
|
777
|
+
the_summarizer = summarizer_from_config(cfg) # ENH-003: provider-selected
|
|
778
|
+
|
|
779
|
+
files = (
|
|
780
|
+
await self._store.graph.query(GraphQuery(kinds=[NodeKind.FILE], limit=10**9))
|
|
781
|
+
).nodes
|
|
782
|
+
dirty = DirtySet(root)
|
|
783
|
+
dirty_ids = await dirty.dirty_for("summaries")
|
|
784
|
+
if dirty_ids: # dirty entries are symbol ids → the files that contain them
|
|
785
|
+
paths = {SymbolID.parse(i).path for i in dirty_ids}
|
|
786
|
+
file_ids = [n.id for n in files if SymbolID.parse(n.id).path in paths]
|
|
787
|
+
else:
|
|
788
|
+
file_ids = [n.id for n in files]
|
|
789
|
+
|
|
790
|
+
enricher = SummaryEnricher(
|
|
791
|
+
repo,
|
|
792
|
+
the_summarizer,
|
|
793
|
+
embedder=embedder_from_config(EmbedConfig.load(self._config)),
|
|
794
|
+
max_words=cfg.summary_max_words,
|
|
795
|
+
levels=cfg.summary_levels,
|
|
796
|
+
budget_usd=budget_usd if budget_usd is not None else cfg.budget_usd,
|
|
797
|
+
concurrency=cfg.concurrency,
|
|
798
|
+
commit=_git_commit(self._repo_path),
|
|
799
|
+
)
|
|
800
|
+
report = await enricher.enrich(self._store, file_ids)
|
|
801
|
+
done_paths = {SymbolID.parse(f).path for f in enricher.last_done_ids}
|
|
802
|
+
await dirty.mark_clean(
|
|
803
|
+
"summaries", [i for i in dirty_ids if SymbolID.parse(i).path in done_paths]
|
|
804
|
+
)
|
|
805
|
+
return report
|
|
806
|
+
|
|
807
|
+
async def summaries(self, level: str | None = None) -> list[SummaryInfo]:
|
|
808
|
+
"""Stored module summaries (feat-012), optionally filtered by level."""
|
|
809
|
+
from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind
|
|
810
|
+
from agentforge_graph.enrich import SummaryInfo
|
|
811
|
+
|
|
812
|
+
nodes = (
|
|
813
|
+
await self._store.graph.query(GraphQuery(kinds=[NodeKind.SUMMARY], limit=10**9))
|
|
814
|
+
).nodes
|
|
815
|
+
out: list[SummaryInfo] = []
|
|
816
|
+
for n in nodes:
|
|
817
|
+
lvl = str(n.attrs.get("level", ""))
|
|
818
|
+
if level is not None and lvl != level:
|
|
819
|
+
continue
|
|
820
|
+
targets = await self._store.graph.adjacent(n.id, [EdgeKind.SUMMARIZES], "out")
|
|
821
|
+
out.append(
|
|
822
|
+
SummaryInfo(
|
|
823
|
+
target=targets[0].dst if targets else "",
|
|
824
|
+
level=lvl,
|
|
825
|
+
text=str(n.attrs.get("text", "")),
|
|
826
|
+
path=str(n.attrs.get("path", "")),
|
|
827
|
+
)
|
|
828
|
+
)
|
|
829
|
+
out.sort(key=lambda s: (s.level, s.path))
|
|
830
|
+
return out
|
|
831
|
+
|
|
832
|
+
@property
|
|
833
|
+
def store(self) -> Store:
|
|
834
|
+
return self._store
|
|
835
|
+
|
|
836
|
+
def stats(self) -> IndexReport:
|
|
837
|
+
if self._report is None:
|
|
838
|
+
raise RuntimeError("no index report: open() does not index — use index()")
|
|
839
|
+
return self._report
|
|
840
|
+
|
|
841
|
+
def embed_stats(self) -> EmbedReport:
|
|
842
|
+
if self._embed_report is None:
|
|
843
|
+
raise RuntimeError("no embed report: call embed() first")
|
|
844
|
+
return self._embed_report
|
|
845
|
+
|
|
846
|
+
async def close(self) -> None:
|
|
847
|
+
await self._store.close()
|