agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""``SummaryEnricher`` (feat-012) — bottom-up module summaries.
|
|
2
|
+
|
|
3
|
+
Leaf-first over ``CONTAINS``: each file is summarised from its symbols
|
|
4
|
+
(signatures) + imports, then one repo summary is synthesised from the file
|
|
5
|
+
summaries. Summaries are ``Summary`` nodes (``SUMMARIZES`` → file / a synthesised
|
|
6
|
+
``Repository`` node) with ``llm`` provenance, and are embedded
|
|
7
|
+
(``source_type="summary"``) so a concept query can land on one and expand to the
|
|
8
|
+
code. Budgeted (``BudgetPolicy``), resumable (``DirtySet("summaries")``), and
|
|
9
|
+
idempotent: the ``Summary`` node is MERGE-updated and its (stable) ``SUMMARIZES``
|
|
10
|
+
edge is created only when missing, and the vector is replaced by ref.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
|
|
17
|
+
from agentforge_core.production.budget import BudgetPolicy
|
|
18
|
+
from agentforge_core.production.exceptions import BudgetExceeded
|
|
19
|
+
|
|
20
|
+
from agentforge_graph.core import (
|
|
21
|
+
Edge,
|
|
22
|
+
EdgeKind,
|
|
23
|
+
Embedded,
|
|
24
|
+
Node,
|
|
25
|
+
NodeKind,
|
|
26
|
+
Provenance,
|
|
27
|
+
SymbolID,
|
|
28
|
+
)
|
|
29
|
+
from agentforge_graph.store import Store
|
|
30
|
+
|
|
31
|
+
from .report import SummaryReport
|
|
32
|
+
from .summarizer import FileContext, Summarizer
|
|
33
|
+
|
|
34
|
+
_SUMMARY_LANG = "summary"
|
|
35
|
+
_REPO_PLACEHOLDER = "<repo>"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def summary_id(repo: str, path: str) -> str:
|
|
39
|
+
return SymbolID.for_symbol(_SUMMARY_LANG, repo, path, "summary.")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def repo_node_id(repo: str) -> str:
|
|
43
|
+
return SymbolID.for_symbol("repo", repo, _REPO_PLACEHOLDER, "repository.")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SummaryEnricher:
|
|
47
|
+
version = "summaries@1"
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
repo: str,
|
|
52
|
+
summarizer: Summarizer,
|
|
53
|
+
*,
|
|
54
|
+
embedder: object | None = None,
|
|
55
|
+
max_words: int = 120,
|
|
56
|
+
levels: list[str] | None = None,
|
|
57
|
+
budget_usd: float = 2.0,
|
|
58
|
+
concurrency: int = 6,
|
|
59
|
+
commit: str = "",
|
|
60
|
+
) -> None:
|
|
61
|
+
self.repo = repo
|
|
62
|
+
self.summarizer = summarizer
|
|
63
|
+
self.embedder = embedder
|
|
64
|
+
self.max_words = max_words
|
|
65
|
+
self.levels = levels or ["file", "repo"]
|
|
66
|
+
self.budget_usd = budget_usd
|
|
67
|
+
self.concurrency = max(1, concurrency)
|
|
68
|
+
self.commit = commit
|
|
69
|
+
self.last_done_ids: list[str] = []
|
|
70
|
+
|
|
71
|
+
async def enrich(self, store: Store, file_ids: list[str]) -> SummaryReport:
|
|
72
|
+
report = SummaryReport()
|
|
73
|
+
self.last_done_ids = []
|
|
74
|
+
if "file" not in self.levels:
|
|
75
|
+
return report
|
|
76
|
+
|
|
77
|
+
budget = BudgetPolicy(usd=self.budget_usd, max_tokens=10**12, max_iterations=10**12)
|
|
78
|
+
prov = Provenance.llm(self.version, 1.0, self.commit)
|
|
79
|
+
nodes: list[Node] = []
|
|
80
|
+
edges: list[Edge] = []
|
|
81
|
+
to_embed: list[tuple[str, str, str]] = [] # (summary_id, path, text)
|
|
82
|
+
file_summaries: list[tuple[str, str]] = [] # (path, text)
|
|
83
|
+
|
|
84
|
+
# contexts first (graph reads), then summarize in concurrent batches —
|
|
85
|
+
# cost accounted per batch outside the gather (ENH-002), bottom-up order
|
|
86
|
+
# preserved (the repo tier runs after all file summaries).
|
|
87
|
+
targets: list[tuple[str, FileContext]] = []
|
|
88
|
+
for fid in file_ids:
|
|
89
|
+
file_node = await store.graph.get(fid)
|
|
90
|
+
if file_node is not None and file_node.kind is NodeKind.FILE:
|
|
91
|
+
targets.append((fid, await self._file_context(store, file_node)))
|
|
92
|
+
|
|
93
|
+
for start in range(0, len(targets), self.concurrency):
|
|
94
|
+
batch = targets[start : start + self.concurrency]
|
|
95
|
+
try:
|
|
96
|
+
budget.check()
|
|
97
|
+
except BudgetExceeded:
|
|
98
|
+
report.budget_tripped = True
|
|
99
|
+
break
|
|
100
|
+
before = self.summarizer.cost_usd
|
|
101
|
+
summaries = await asyncio.gather(
|
|
102
|
+
*(self.summarizer.summarize_file(ctx, self.max_words) for _fid, ctx in batch)
|
|
103
|
+
)
|
|
104
|
+
budget.commit(self.summarizer.cost_usd - before)
|
|
105
|
+
for (fid, _ctx), summary in zip(batch, summaries, strict=True):
|
|
106
|
+
path = SymbolID.parse(fid).path
|
|
107
|
+
sid = summary_id(self.repo, path)
|
|
108
|
+
nodes.append(
|
|
109
|
+
self._summary_node(sid, summary.text, "file", summary.model, path, prov)
|
|
110
|
+
)
|
|
111
|
+
edges.append(Edge(src=sid, dst=fid, kind=EdgeKind.SUMMARIZES, provenance=prov))
|
|
112
|
+
to_embed.append((sid, path, summary.text))
|
|
113
|
+
file_summaries.append((path, summary.text))
|
|
114
|
+
self.last_done_ids.append(fid)
|
|
115
|
+
report.files_summarized += 1
|
|
116
|
+
|
|
117
|
+
# repo tier (bottom-up from the file summaries) — also budget-gated
|
|
118
|
+
repo_ok = "repo" in self.levels and bool(file_summaries) and not report.budget_tripped
|
|
119
|
+
if repo_ok:
|
|
120
|
+
try:
|
|
121
|
+
budget.check()
|
|
122
|
+
except BudgetExceeded:
|
|
123
|
+
report.budget_tripped = True
|
|
124
|
+
repo_ok = False
|
|
125
|
+
if repo_ok:
|
|
126
|
+
before = self.summarizer.cost_usd
|
|
127
|
+
repo_summary = await self.summarizer.summarize_repo(
|
|
128
|
+
self.repo, file_summaries, self.max_words
|
|
129
|
+
)
|
|
130
|
+
budget.commit(self.summarizer.cost_usd - before)
|
|
131
|
+
rnode = repo_node_id(self.repo)
|
|
132
|
+
nodes.append(Node(id=rnode, kind=NodeKind.REPOSITORY, name=self.repo, provenance=prov))
|
|
133
|
+
rsid = summary_id(self.repo, _REPO_PLACEHOLDER)
|
|
134
|
+
nodes.append(
|
|
135
|
+
self._summary_node(rsid, repo_summary.text, "repo", repo_summary.model, "", prov)
|
|
136
|
+
)
|
|
137
|
+
edges.append(Edge(src=rsid, dst=rnode, kind=EdgeKind.SUMMARIZES, provenance=prov))
|
|
138
|
+
to_embed.append((rsid, "", repo_summary.text))
|
|
139
|
+
report.repo_summarized = True
|
|
140
|
+
|
|
141
|
+
report.cost_usd = round(self.summarizer.cost_usd, 6)
|
|
142
|
+
|
|
143
|
+
# Idempotent without edge churn: MERGE the summary nodes (this refreshes
|
|
144
|
+
# their text), then create each SUMMARIZES edge only if it's missing. The
|
|
145
|
+
# edge target is stable (a summary always summarizes the same file), so
|
|
146
|
+
# we never delete+recreate it — avoiding a Kuzu forward-rel-scan
|
|
147
|
+
# staleness bug (see docs/framework note).
|
|
148
|
+
if nodes:
|
|
149
|
+
await store.graph.add(list(nodes))
|
|
150
|
+
for edge in edges:
|
|
151
|
+
existing = await store.graph.adjacent(edge.src, [edge.kind], "out")
|
|
152
|
+
if not any(e.dst == edge.dst for e in existing):
|
|
153
|
+
await store.graph.add([edge])
|
|
154
|
+
await self._embed(store, to_embed)
|
|
155
|
+
return report
|
|
156
|
+
|
|
157
|
+
# --- helpers ----------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
def _summary_node(
|
|
160
|
+
self, sid: str, text: str, level: str, model: str, path: str, prov: Provenance
|
|
161
|
+
) -> Node:
|
|
162
|
+
return Node(
|
|
163
|
+
id=sid,
|
|
164
|
+
kind=NodeKind.SUMMARY,
|
|
165
|
+
name=f"summary:{path or self.repo}",
|
|
166
|
+
attrs={
|
|
167
|
+
"text": text,
|
|
168
|
+
"level": level,
|
|
169
|
+
"model": model,
|
|
170
|
+
"prompt_version": self.version,
|
|
171
|
+
"path": path,
|
|
172
|
+
},
|
|
173
|
+
provenance=prov,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
async def _file_context(self, store: Store, file_node: Node) -> FileContext:
|
|
177
|
+
symbols: list[tuple[str, str]] = []
|
|
178
|
+
for e in await store.graph.adjacent(file_node.id, [EdgeKind.CONTAINS], "out"):
|
|
179
|
+
child = await store.graph.get(e.dst)
|
|
180
|
+
if child is not None and child.kind in (
|
|
181
|
+
NodeKind.CLASS,
|
|
182
|
+
NodeKind.FUNCTION,
|
|
183
|
+
NodeKind.METHOD,
|
|
184
|
+
):
|
|
185
|
+
symbols.append((child.name, str(child.attrs.get("signature", ""))))
|
|
186
|
+
imports = [
|
|
187
|
+
str(imp.get("module", ""))
|
|
188
|
+
for imp in file_node.attrs.get("imports", [])
|
|
189
|
+
if imp.get("module")
|
|
190
|
+
]
|
|
191
|
+
return FileContext(path=SymbolID.parse(file_node.id).path, symbols=symbols, imports=imports)
|
|
192
|
+
|
|
193
|
+
async def _embed(self, store: Store, items: list[tuple[str, str, str]]) -> None:
|
|
194
|
+
from agentforge_graph.embed import Embedder
|
|
195
|
+
|
|
196
|
+
if not items or not isinstance(self.embedder, Embedder):
|
|
197
|
+
return
|
|
198
|
+
vectors = await self.embedder.embed([text for _, _, text in items], "document")
|
|
199
|
+
embedded = [
|
|
200
|
+
Embedded(
|
|
201
|
+
ref=sid,
|
|
202
|
+
vector=vec,
|
|
203
|
+
kind=NodeKind.SUMMARY,
|
|
204
|
+
attrs={"path": path, "source_type": "summary", "model": self.embedder.name},
|
|
205
|
+
)
|
|
206
|
+
for (sid, path, _text), vec in zip(items, vectors, strict=True)
|
|
207
|
+
]
|
|
208
|
+
# replace any prior vectors for these refs, then add
|
|
209
|
+
for sid, _path, _text in items:
|
|
210
|
+
await store.vectors.delete_where({"ref": sid})
|
|
211
|
+
await store.vectors.upsert(embedded)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""The fixed v1 design-pattern taxonomy (feat-012).
|
|
2
|
+
|
|
3
|
+
A ``PatternTag`` is a shared taxonomy node (one per pattern name); a ``TAGGED``
|
|
4
|
+
edge goes code-symbol → ``PatternTag`` with confidence + rationale in attrs.
|
|
5
|
+
The list is locked at v1 (GoF core + architectural roles); extensible by config
|
|
6
|
+
later. See spec §4.2.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from agentforge_graph.core import SymbolID
|
|
12
|
+
|
|
13
|
+
TAXONOMY_V1: tuple[str, ...] = (
|
|
14
|
+
"Singleton",
|
|
15
|
+
"Factory",
|
|
16
|
+
"Builder",
|
|
17
|
+
"Adapter",
|
|
18
|
+
"Facade",
|
|
19
|
+
"Observer",
|
|
20
|
+
"Strategy",
|
|
21
|
+
"Decorator",
|
|
22
|
+
"Repository",
|
|
23
|
+
"Service",
|
|
24
|
+
"Controller",
|
|
25
|
+
"DTO",
|
|
26
|
+
"ValueObject",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
_PATTERN_PATH = "<taxonomy>"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_pattern(name: str) -> bool:
|
|
33
|
+
return name in TAXONOMY_V1
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def pattern_tag_id(repo: str, pattern: str) -> str:
|
|
37
|
+
"""Stable id for a taxonomy node — a shared singleton per pattern name."""
|
|
38
|
+
return SymbolID.for_symbol("pattern", repo, _PATTERN_PATH, f"{pattern}.")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Framework-aware extractors (feat-011): export framework semantics —
|
|
2
|
+
routes, ORM models, DI — as graph edges agents can traverse.
|
|
3
|
+
|
|
4
|
+
A ``FrameworkPack`` rides feat-002's per-file extraction and emits framework
|
|
5
|
+
nodes/edges into the file's ``FileSubgraph`` (so feat-004 incrementality
|
|
6
|
+
applies for free). v0.4 MVP ships the **FastAPI** routes pack; ORM/DI and more
|
|
7
|
+
frameworks follow over the same harness. Zero ``agentforge`` imports (ADR-0001).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .base import FrameworkFacts, FrameworkPack
|
|
13
|
+
from .detect import active_frameworks
|
|
14
|
+
from .extractor import FrameworkExtractor
|
|
15
|
+
from .registry import (
|
|
16
|
+
BUILTIN_FRAMEWORK_PACKS,
|
|
17
|
+
FrameworkRegistry,
|
|
18
|
+
builtin_framework_registry,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"FrameworkFacts",
|
|
23
|
+
"FrameworkPack",
|
|
24
|
+
"FrameworkExtractor",
|
|
25
|
+
"FrameworkRegistry",
|
|
26
|
+
"BUILTIN_FRAMEWORK_PACKS",
|
|
27
|
+
"builtin_framework_registry",
|
|
28
|
+
"active_frameworks",
|
|
29
|
+
]
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""The ``FrameworkPack`` ABC + ``FrameworkFacts`` (feat-011).
|
|
2
|
+
|
|
3
|
+
A framework pack rides feat-002's extraction: given a parsed file it emits
|
|
4
|
+
framework nodes/edges (``Route``/``DataModel``/``Service`` + ``HANDLED_BY``/…)
|
|
5
|
+
attached to the symbols the *language* pack already produced (same SymbolID
|
|
6
|
+
scheme). The facts are merged into the file's ``FileSubgraph`` (pipeline), so
|
|
7
|
+
they inherit feat-004 incrementality for free — file-owned, ``parsed``
|
|
8
|
+
provenance, never touched by the resolver's ``clear_resolved``.
|
|
9
|
+
|
|
10
|
+
Detection is declarative: a pack lists the dependency names and import markers
|
|
11
|
+
that mean "this repo uses me"; ``frameworks.detect`` does the scanning. Zero
|
|
12
|
+
``agentforge`` imports (ADR-0001).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
|
|
21
|
+
from agentforge_graph.core import Edge, GraphStore, Node, SourceFile
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FrameworkFacts(BaseModel):
|
|
25
|
+
"""What a pack derived from one file. ``unresolved`` counts registrations
|
|
26
|
+
the pack recognised but could not extract statically (dynamic paths,
|
|
27
|
+
class-based handlers at MVP) — surfaced in the IndexReport, never dropped
|
|
28
|
+
silently."""
|
|
29
|
+
|
|
30
|
+
nodes: list[Node] = Field(default_factory=list)
|
|
31
|
+
edges: list[Edge] = Field(default_factory=list)
|
|
32
|
+
unresolved: int = 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class FrameworkPack(ABC):
|
|
36
|
+
"""A framework's extraction rules over one language's parse trees."""
|
|
37
|
+
|
|
38
|
+
name: str = "" # "fastapi"
|
|
39
|
+
language: str = "" # the language pack this rides ("python")
|
|
40
|
+
language_slug: str = "" # SymbolID slug of that language ("py")
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def slugs(self) -> tuple[str, ...]:
|
|
44
|
+
"""The SymbolID slugs this pack extracts over. Defaults to the single
|
|
45
|
+
``language_slug``; a pack spanning sibling languages (e.g. Express over
|
|
46
|
+
JS *and* TS) overrides this and uses ``file.language`` per file."""
|
|
47
|
+
return (self.language_slug,)
|
|
48
|
+
|
|
49
|
+
version: str = "1" # bump on pattern changes (provenance + future --full)
|
|
50
|
+
dep_names: tuple[str, ...] = () # manifest dependency names that imply this framework
|
|
51
|
+
import_markers: tuple[str, ...] = () # source substrings that confirm use
|
|
52
|
+
|
|
53
|
+
def detect(self, dep_names: set[str], source_sample: str) -> bool:
|
|
54
|
+
"""Active for this repo? A declared dependency, or an import marker in
|
|
55
|
+
the sampled source. Override for bespoke detection."""
|
|
56
|
+
if dep_names.intersection(self.dep_names):
|
|
57
|
+
return True
|
|
58
|
+
return any(marker in source_sample for marker in self.import_markers)
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def extract(self, file: SourceFile, repo: str, commit: str) -> FrameworkFacts:
|
|
62
|
+
"""Pass-1, file-isolated: emit framework nodes/edges for ``file``."""
|
|
63
|
+
|
|
64
|
+
async def resolve(self, store: GraphStore, commit: str = "") -> list[Edge]:
|
|
65
|
+
"""Optional pass-2 cross-file stitching (ORM relationship/FK string
|
|
66
|
+
targets, router prefixes, string view refs). Reads the graph built in
|
|
67
|
+
pass-1 and returns resolved-provenance edges; the orchestrator clears the
|
|
68
|
+
previous generation and upserts the result (idempotent). Packs with no
|
|
69
|
+
cross-file step return ``[]``."""
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
def coupled_files(self, path: str) -> bool:
|
|
73
|
+
"""True for files whose change forces a framework re-resolve (e.g.
|
|
74
|
+
``urls.py``). MVP: no pass-2, so always False."""
|
|
75
|
+
return False
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Framework detection (feat-011): which packs are active for a repo.
|
|
2
|
+
|
|
3
|
+
Honours ``frameworks.enabled`` (``auto`` → detect, ``off`` → none, or an
|
|
4
|
+
explicit list) plus ``frameworks.packs`` force-enable. Auto-detection reads
|
|
5
|
+
dependency manifests (``pyproject.toml`` / ``requirements*.txt``) and, as a
|
|
6
|
+
fallback, samples source text for each pack's import markers — so a repo that
|
|
7
|
+
vendors a framework without a manifest still activates.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
import tomllib
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from agentforge_graph.config import FrameworksConfig
|
|
17
|
+
|
|
18
|
+
from .base import FrameworkPack
|
|
19
|
+
from .registry import FrameworkRegistry
|
|
20
|
+
|
|
21
|
+
_NAME_RE = re.compile(r"^[A-Za-z0-9_.-]+")
|
|
22
|
+
_SAMPLE_CAP = 256 * 1024 # bytes of source to scan for import markers
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _norm(name: str) -> str:
|
|
26
|
+
return name.strip().lower().replace("_", "-")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _dep_name(requirement: str) -> str:
|
|
30
|
+
"""The distribution name from a PEP 508 requirement string
|
|
31
|
+
(``fastapi>=0.110`` → ``fastapi``)."""
|
|
32
|
+
m = _NAME_RE.match(requirement.strip())
|
|
33
|
+
return _norm(m.group(0)) if m else ""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _pyproject_deps(path: Path) -> set[str]:
|
|
37
|
+
try:
|
|
38
|
+
data = tomllib.loads(path.read_text(encoding="utf-8", errors="replace"))
|
|
39
|
+
except (OSError, tomllib.TOMLDecodeError):
|
|
40
|
+
return set()
|
|
41
|
+
deps: set[str] = set()
|
|
42
|
+
project = data.get("project", {})
|
|
43
|
+
for req in project.get("dependencies", []):
|
|
44
|
+
deps.add(_dep_name(str(req)))
|
|
45
|
+
for group in project.get("optional-dependencies", {}).values():
|
|
46
|
+
deps.update(_dep_name(str(req)) for req in group)
|
|
47
|
+
# poetry-style
|
|
48
|
+
poetry = data.get("tool", {}).get("poetry", {})
|
|
49
|
+
deps.update(_norm(k) for k in poetry.get("dependencies", {}))
|
|
50
|
+
return {d for d in deps if d and d != "python"}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _requirements_deps(path: Path) -> set[str]:
|
|
54
|
+
try:
|
|
55
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
56
|
+
except OSError:
|
|
57
|
+
return set()
|
|
58
|
+
deps: set[str] = set()
|
|
59
|
+
for line in text.splitlines():
|
|
60
|
+
line = line.strip()
|
|
61
|
+
if line and not line.startswith(("#", "-")):
|
|
62
|
+
deps.add(_dep_name(line))
|
|
63
|
+
return {d for d in deps if d}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _manifest_deps(repo_path: Path) -> set[str]:
|
|
67
|
+
"""Best-effort dependency-name set from common manifests (names only),
|
|
68
|
+
lowercased with ``_``→``-`` so ``Flask_SQLAlchemy`` == ``flask-sqlalchemy``."""
|
|
69
|
+
deps: set[str] = set()
|
|
70
|
+
pyproject = repo_path / "pyproject.toml"
|
|
71
|
+
if pyproject.is_file():
|
|
72
|
+
deps |= _pyproject_deps(pyproject)
|
|
73
|
+
for req in repo_path.glob("requirements*.txt"):
|
|
74
|
+
deps |= _requirements_deps(req)
|
|
75
|
+
return deps
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _source_sample(repo_path: Path, exts: set[str]) -> str:
|
|
79
|
+
"""Concatenate up to ``_SAMPLE_CAP`` bytes of source (files matching the
|
|
80
|
+
active languages' extensions) for import-marker confirmation."""
|
|
81
|
+
chunks: list[str] = []
|
|
82
|
+
total = 0
|
|
83
|
+
for path in sorted(repo_path.rglob("*")):
|
|
84
|
+
if total >= _SAMPLE_CAP:
|
|
85
|
+
break
|
|
86
|
+
if not path.is_file() or path.suffix not in exts or ".ckg" in path.parts:
|
|
87
|
+
continue
|
|
88
|
+
try:
|
|
89
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
90
|
+
except OSError:
|
|
91
|
+
continue
|
|
92
|
+
chunks.append(text)
|
|
93
|
+
total += len(text)
|
|
94
|
+
return "\n".join(chunks)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def active_frameworks(
|
|
98
|
+
repo_path: str | Path,
|
|
99
|
+
config: str | Path | None,
|
|
100
|
+
registry: FrameworkRegistry,
|
|
101
|
+
language_extensions: set[str],
|
|
102
|
+
) -> list[FrameworkPack]:
|
|
103
|
+
cfg = FrameworksConfig.load(config)
|
|
104
|
+
enabled = cfg.enabled
|
|
105
|
+
if enabled == "off":
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
root = Path(repo_path)
|
|
109
|
+
forced = set(cfg.packs)
|
|
110
|
+
|
|
111
|
+
# Explicit list short-circuits detection (still honour force-enable).
|
|
112
|
+
if isinstance(enabled, list):
|
|
113
|
+
wanted = set(enabled) | forced
|
|
114
|
+
return [p for p in registry.packs if p.name in wanted]
|
|
115
|
+
|
|
116
|
+
# "auto": dependency manifest + import-marker fallback.
|
|
117
|
+
deps = _manifest_deps(root)
|
|
118
|
+
needs_sample = any(p.import_markers for p in registry.packs)
|
|
119
|
+
sample = _source_sample(root, language_extensions) if needs_sample else ""
|
|
120
|
+
active: list[FrameworkPack] = []
|
|
121
|
+
for pack in registry.packs:
|
|
122
|
+
if pack.name in forced or pack.detect(deps, sample):
|
|
123
|
+
active.append(pack)
|
|
124
|
+
return active
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""``FrameworkExtractor`` — run the active framework packs over one file
|
|
2
|
+
(feat-011). Selects packs by the file's language and merges their
|
|
3
|
+
``FrameworkFacts``. File-isolated and stateless, so it runs inside the same
|
|
4
|
+
worker thread as the language extractor (pipeline)."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from agentforge_graph.core import (
|
|
9
|
+
Edge,
|
|
10
|
+
EdgeKind,
|
|
11
|
+
GraphQuery,
|
|
12
|
+
GraphStore,
|
|
13
|
+
Node,
|
|
14
|
+
NodeKind,
|
|
15
|
+
SourceFile,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from .base import FrameworkFacts, FrameworkPack
|
|
19
|
+
|
|
20
|
+
_ALL = 10_000_000 # effectively unbounded query for v0.1 graph sizes
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FrameworkExtractor:
|
|
24
|
+
def __init__(self, packs: list[FrameworkPack]) -> None:
|
|
25
|
+
self._packs = list(packs)
|
|
26
|
+
self._by_slug: dict[str, list[FrameworkPack]] = {}
|
|
27
|
+
for pack in packs:
|
|
28
|
+
for slug in pack.slugs:
|
|
29
|
+
self._by_slug.setdefault(slug, []).append(pack)
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def active(self) -> bool:
|
|
33
|
+
return bool(self._packs)
|
|
34
|
+
|
|
35
|
+
def extract(self, file: SourceFile, repo: str, commit: str) -> FrameworkFacts:
|
|
36
|
+
merged = FrameworkFacts()
|
|
37
|
+
for pack in self._by_slug.get(file.language, []):
|
|
38
|
+
facts = pack.extract(file, repo, commit)
|
|
39
|
+
merged.nodes.extend(facts.nodes)
|
|
40
|
+
merged.edges.extend(facts.edges)
|
|
41
|
+
merged.unresolved += facts.unresolved
|
|
42
|
+
return merged
|
|
43
|
+
|
|
44
|
+
async def resolve(self, store: GraphStore, commit: str = "") -> tuple[int, int]:
|
|
45
|
+
"""Run every active pack's cross-file pass-2 (ORM relationship/FK string
|
|
46
|
+
targets, …) and replace the previous generation of framework-resolved
|
|
47
|
+
edges. Globally idempotent: clears all ``RELATES_TO`` out of the current
|
|
48
|
+
framework nodes, then rebuilds from the whole-repo node set — so an
|
|
49
|
+
incremental resolve converges to the same graph as a full re-index
|
|
50
|
+
(feat-004). Returns ``(edges_resolved, targets_unresolved)``."""
|
|
51
|
+
if not self._packs:
|
|
52
|
+
return 0, 0
|
|
53
|
+
models = (await store.query(GraphQuery(kinds=[NodeKind.DATA_MODEL], limit=_ALL))).nodes
|
|
54
|
+
if models:
|
|
55
|
+
await store.clear_outgoing([m.id for m in models], EdgeKind.RELATES_TO)
|
|
56
|
+
pending = sum(len(m.attrs.get("relations") or []) for m in models)
|
|
57
|
+
|
|
58
|
+
edges: list[Node | Edge] = []
|
|
59
|
+
for pack in self._packs:
|
|
60
|
+
edges.extend(await pack.resolve(store, commit))
|
|
61
|
+
if edges:
|
|
62
|
+
await store.add(edges)
|
|
63
|
+
return len(edges), max(0, pending - len(edges))
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Shared ORM pass-2 helpers (feat-011).
|
|
2
|
+
|
|
3
|
+
The cross-file ``RELATES_TO`` stitch is the same shape for every ORM pack: load
|
|
4
|
+
the whole-repo model set, index it by class name and table, then turn each
|
|
5
|
+
model's pending ``relations`` (recorded in pass-1) into edges via a unique-match
|
|
6
|
+
lookup (ADR-0004 — never guess an ambiguous target). Only the per-relation
|
|
7
|
+
target resolution differs per framework, so that is injected as a callback.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Callable, Iterable
|
|
13
|
+
|
|
14
|
+
from agentforge_graph.core import (
|
|
15
|
+
Edge,
|
|
16
|
+
EdgeKind,
|
|
17
|
+
GraphQuery,
|
|
18
|
+
GraphStore,
|
|
19
|
+
Node,
|
|
20
|
+
NodeKind,
|
|
21
|
+
Provenance,
|
|
22
|
+
SymbolID,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
_ALL = 10_000_000 # effectively unbounded query for v0.1 graph sizes
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ModelIndex:
|
|
29
|
+
"""Whole-repo model lookup by class name and table, each mapping to the set
|
|
30
|
+
of model ids carrying it (so duplicates are detectable and never guessed)."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, models: Iterable[Node]) -> None:
|
|
33
|
+
self.by_class: dict[str, set[str]] = {}
|
|
34
|
+
self.by_table: dict[str, set[str]] = {}
|
|
35
|
+
for m in models:
|
|
36
|
+
cls = str(m.attrs.get("model_class", ""))
|
|
37
|
+
if cls:
|
|
38
|
+
self.by_class.setdefault(cls, set()).add(m.id)
|
|
39
|
+
tbl = str(m.attrs.get("table", ""))
|
|
40
|
+
if tbl:
|
|
41
|
+
self.by_table.setdefault(tbl, set()).add(m.id)
|
|
42
|
+
|
|
43
|
+
def unique_class(self, name: str) -> str | None:
|
|
44
|
+
return _unique(self.by_class.get(name))
|
|
45
|
+
|
|
46
|
+
def unique_table(self, name: str) -> str | None:
|
|
47
|
+
return _unique(self.by_table.get(name))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _unique(ids: set[str] | None) -> str | None:
|
|
51
|
+
return next(iter(ids)) if ids and len(ids) == 1 else None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def framework_models(store: GraphStore, framework: str) -> list[Node]:
|
|
55
|
+
"""Every ``DataModel`` node emitted by ``framework``."""
|
|
56
|
+
nodes = (await store.query(GraphQuery(kinds=[NodeKind.DATA_MODEL], limit=_ALL))).nodes
|
|
57
|
+
return [m for m in nodes if m.attrs.get("framework") == framework]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def relations_to_edges(
|
|
61
|
+
models: list[Node],
|
|
62
|
+
index: ModelIndex,
|
|
63
|
+
resolve_target: Callable[[dict[str, str], ModelIndex], str | None],
|
|
64
|
+
prov: Provenance,
|
|
65
|
+
) -> list[Edge]:
|
|
66
|
+
"""Build deduped ``RELATES_TO`` edges from each model's pending relations.
|
|
67
|
+
``resolve_target`` maps one relation dict to a target model id (or None when
|
|
68
|
+
external/ambiguous). Edges carry ``attrs.kind`` (relationship/fk/m2m/…) and
|
|
69
|
+
``attrs.via`` (the field), owned by the source model's file for incremental
|
|
70
|
+
invalidation."""
|
|
71
|
+
edges: list[Edge] = []
|
|
72
|
+
seen: set[tuple[str, str, str]] = set()
|
|
73
|
+
for m in models:
|
|
74
|
+
for rel in m.attrs.get("relations") or []:
|
|
75
|
+
target_id = resolve_target(rel, index)
|
|
76
|
+
if target_id is None:
|
|
77
|
+
continue
|
|
78
|
+
kind = str(rel.get("kind", ""))
|
|
79
|
+
key = (m.id, target_id, kind)
|
|
80
|
+
if key in seen:
|
|
81
|
+
continue
|
|
82
|
+
seen.add(key)
|
|
83
|
+
edges.append(
|
|
84
|
+
Edge(
|
|
85
|
+
src=m.id,
|
|
86
|
+
dst=target_id,
|
|
87
|
+
kind=EdgeKind.RELATES_TO,
|
|
88
|
+
attrs={"kind": kind, "via": str(rel.get("field", ""))},
|
|
89
|
+
provenance=prov,
|
|
90
|
+
origin_path=SymbolID.parse(m.id).path,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
return edges
|