agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,211 @@
1
+ """``SummaryEnricher`` (feat-012) — bottom-up module summaries.
2
+
3
+ Leaf-first over ``CONTAINS``: each file is summarised from its symbols
4
+ (signatures) + imports, then one repo summary is synthesised from the file
5
+ summaries. Summaries are ``Summary`` nodes (``SUMMARIZES`` → file / a synthesised
6
+ ``Repository`` node) with ``llm`` provenance, and are embedded
7
+ (``source_type="summary"``) so a concept query can land on one and expand to the
8
+ code. Budgeted (``BudgetPolicy``), resumable (``DirtySet("summaries")``), and
9
+ idempotent: the ``Summary`` node is MERGE-updated and its (stable) ``SUMMARIZES``
10
+ edge is created only when missing, and the vector is replaced by ref.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+
17
+ from agentforge_core.production.budget import BudgetPolicy
18
+ from agentforge_core.production.exceptions import BudgetExceeded
19
+
20
+ from agentforge_graph.core import (
21
+ Edge,
22
+ EdgeKind,
23
+ Embedded,
24
+ Node,
25
+ NodeKind,
26
+ Provenance,
27
+ SymbolID,
28
+ )
29
+ from agentforge_graph.store import Store
30
+
31
+ from .report import SummaryReport
32
+ from .summarizer import FileContext, Summarizer
33
+
34
+ _SUMMARY_LANG = "summary"
35
+ _REPO_PLACEHOLDER = "<repo>"
36
+
37
+
38
+ def summary_id(repo: str, path: str) -> str:
39
+ return SymbolID.for_symbol(_SUMMARY_LANG, repo, path, "summary.")
40
+
41
+
42
+ def repo_node_id(repo: str) -> str:
43
+ return SymbolID.for_symbol("repo", repo, _REPO_PLACEHOLDER, "repository.")
44
+
45
+
46
+ class SummaryEnricher:
47
+ version = "summaries@1"
48
+
49
+ def __init__(
50
+ self,
51
+ repo: str,
52
+ summarizer: Summarizer,
53
+ *,
54
+ embedder: object | None = None,
55
+ max_words: int = 120,
56
+ levels: list[str] | None = None,
57
+ budget_usd: float = 2.0,
58
+ concurrency: int = 6,
59
+ commit: str = "",
60
+ ) -> None:
61
+ self.repo = repo
62
+ self.summarizer = summarizer
63
+ self.embedder = embedder
64
+ self.max_words = max_words
65
+ self.levels = levels or ["file", "repo"]
66
+ self.budget_usd = budget_usd
67
+ self.concurrency = max(1, concurrency)
68
+ self.commit = commit
69
+ self.last_done_ids: list[str] = []
70
+
71
+ async def enrich(self, store: Store, file_ids: list[str]) -> SummaryReport:
72
+ report = SummaryReport()
73
+ self.last_done_ids = []
74
+ if "file" not in self.levels:
75
+ return report
76
+
77
+ budget = BudgetPolicy(usd=self.budget_usd, max_tokens=10**12, max_iterations=10**12)
78
+ prov = Provenance.llm(self.version, 1.0, self.commit)
79
+ nodes: list[Node] = []
80
+ edges: list[Edge] = []
81
+ to_embed: list[tuple[str, str, str]] = [] # (summary_id, path, text)
82
+ file_summaries: list[tuple[str, str]] = [] # (path, text)
83
+
84
+ # contexts first (graph reads), then summarize in concurrent batches —
85
+ # cost accounted per batch outside the gather (ENH-002), bottom-up order
86
+ # preserved (the repo tier runs after all file summaries).
87
+ targets: list[tuple[str, FileContext]] = []
88
+ for fid in file_ids:
89
+ file_node = await store.graph.get(fid)
90
+ if file_node is not None and file_node.kind is NodeKind.FILE:
91
+ targets.append((fid, await self._file_context(store, file_node)))
92
+
93
+ for start in range(0, len(targets), self.concurrency):
94
+ batch = targets[start : start + self.concurrency]
95
+ try:
96
+ budget.check()
97
+ except BudgetExceeded:
98
+ report.budget_tripped = True
99
+ break
100
+ before = self.summarizer.cost_usd
101
+ summaries = await asyncio.gather(
102
+ *(self.summarizer.summarize_file(ctx, self.max_words) for _fid, ctx in batch)
103
+ )
104
+ budget.commit(self.summarizer.cost_usd - before)
105
+ for (fid, _ctx), summary in zip(batch, summaries, strict=True):
106
+ path = SymbolID.parse(fid).path
107
+ sid = summary_id(self.repo, path)
108
+ nodes.append(
109
+ self._summary_node(sid, summary.text, "file", summary.model, path, prov)
110
+ )
111
+ edges.append(Edge(src=sid, dst=fid, kind=EdgeKind.SUMMARIZES, provenance=prov))
112
+ to_embed.append((sid, path, summary.text))
113
+ file_summaries.append((path, summary.text))
114
+ self.last_done_ids.append(fid)
115
+ report.files_summarized += 1
116
+
117
+ # repo tier (bottom-up from the file summaries) — also budget-gated
118
+ repo_ok = "repo" in self.levels and bool(file_summaries) and not report.budget_tripped
119
+ if repo_ok:
120
+ try:
121
+ budget.check()
122
+ except BudgetExceeded:
123
+ report.budget_tripped = True
124
+ repo_ok = False
125
+ if repo_ok:
126
+ before = self.summarizer.cost_usd
127
+ repo_summary = await self.summarizer.summarize_repo(
128
+ self.repo, file_summaries, self.max_words
129
+ )
130
+ budget.commit(self.summarizer.cost_usd - before)
131
+ rnode = repo_node_id(self.repo)
132
+ nodes.append(Node(id=rnode, kind=NodeKind.REPOSITORY, name=self.repo, provenance=prov))
133
+ rsid = summary_id(self.repo, _REPO_PLACEHOLDER)
134
+ nodes.append(
135
+ self._summary_node(rsid, repo_summary.text, "repo", repo_summary.model, "", prov)
136
+ )
137
+ edges.append(Edge(src=rsid, dst=rnode, kind=EdgeKind.SUMMARIZES, provenance=prov))
138
+ to_embed.append((rsid, "", repo_summary.text))
139
+ report.repo_summarized = True
140
+
141
+ report.cost_usd = round(self.summarizer.cost_usd, 6)
142
+
143
+ # Idempotent without edge churn: MERGE the summary nodes (this refreshes
144
+ # their text), then create each SUMMARIZES edge only if it's missing. The
145
+ # edge target is stable (a summary always summarizes the same file), so
146
+ # we never delete+recreate it — avoiding a Kuzu forward-rel-scan
147
+ # staleness bug (see docs/framework note).
148
+ if nodes:
149
+ await store.graph.add(list(nodes))
150
+ for edge in edges:
151
+ existing = await store.graph.adjacent(edge.src, [edge.kind], "out")
152
+ if not any(e.dst == edge.dst for e in existing):
153
+ await store.graph.add([edge])
154
+ await self._embed(store, to_embed)
155
+ return report
156
+
157
+ # --- helpers ----------------------------------------------------------
158
+
159
+ def _summary_node(
160
+ self, sid: str, text: str, level: str, model: str, path: str, prov: Provenance
161
+ ) -> Node:
162
+ return Node(
163
+ id=sid,
164
+ kind=NodeKind.SUMMARY,
165
+ name=f"summary:{path or self.repo}",
166
+ attrs={
167
+ "text": text,
168
+ "level": level,
169
+ "model": model,
170
+ "prompt_version": self.version,
171
+ "path": path,
172
+ },
173
+ provenance=prov,
174
+ )
175
+
176
+ async def _file_context(self, store: Store, file_node: Node) -> FileContext:
177
+ symbols: list[tuple[str, str]] = []
178
+ for e in await store.graph.adjacent(file_node.id, [EdgeKind.CONTAINS], "out"):
179
+ child = await store.graph.get(e.dst)
180
+ if child is not None and child.kind in (
181
+ NodeKind.CLASS,
182
+ NodeKind.FUNCTION,
183
+ NodeKind.METHOD,
184
+ ):
185
+ symbols.append((child.name, str(child.attrs.get("signature", ""))))
186
+ imports = [
187
+ str(imp.get("module", ""))
188
+ for imp in file_node.attrs.get("imports", [])
189
+ if imp.get("module")
190
+ ]
191
+ return FileContext(path=SymbolID.parse(file_node.id).path, symbols=symbols, imports=imports)
192
+
193
+ async def _embed(self, store: Store, items: list[tuple[str, str, str]]) -> None:
194
+ from agentforge_graph.embed import Embedder
195
+
196
+ if not items or not isinstance(self.embedder, Embedder):
197
+ return
198
+ vectors = await self.embedder.embed([text for _, _, text in items], "document")
199
+ embedded = [
200
+ Embedded(
201
+ ref=sid,
202
+ vector=vec,
203
+ kind=NodeKind.SUMMARY,
204
+ attrs={"path": path, "source_type": "summary", "model": self.embedder.name},
205
+ )
206
+ for (sid, path, _text), vec in zip(items, vectors, strict=True)
207
+ ]
208
+ # replace any prior vectors for these refs, then add
209
+ for sid, _path, _text in items:
210
+ await store.vectors.delete_where({"ref": sid})
211
+ await store.vectors.upsert(embedded)
@@ -0,0 +1,38 @@
1
+ """The fixed v1 design-pattern taxonomy (feat-012).
2
+
3
+ A ``PatternTag`` is a shared taxonomy node (one per pattern name); a ``TAGGED``
4
+ edge goes code-symbol → ``PatternTag`` with confidence + rationale in attrs.
5
+ The list is locked at v1 (GoF core + architectural roles); extensible by config
6
+ later. See spec §4.2.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from agentforge_graph.core import SymbolID
12
+
13
+ TAXONOMY_V1: tuple[str, ...] = (
14
+ "Singleton",
15
+ "Factory",
16
+ "Builder",
17
+ "Adapter",
18
+ "Facade",
19
+ "Observer",
20
+ "Strategy",
21
+ "Decorator",
22
+ "Repository",
23
+ "Service",
24
+ "Controller",
25
+ "DTO",
26
+ "ValueObject",
27
+ )
28
+
29
+ _PATTERN_PATH = "<taxonomy>"
30
+
31
+
32
+ def is_pattern(name: str) -> bool:
33
+ return name in TAXONOMY_V1
34
+
35
+
36
+ def pattern_tag_id(repo: str, pattern: str) -> str:
37
+ """Stable id for a taxonomy node — a shared singleton per pattern name."""
38
+ return SymbolID.for_symbol("pattern", repo, _PATTERN_PATH, f"{pattern}.")
@@ -0,0 +1,29 @@
1
+ """Framework-aware extractors (feat-011): export framework semantics —
2
+ routes, ORM models, DI — as graph edges agents can traverse.
3
+
4
+ A ``FrameworkPack`` rides feat-002's per-file extraction and emits framework
5
+ nodes/edges into the file's ``FileSubgraph`` (so feat-004 incrementality
6
+ applies for free). v0.4 MVP ships the **FastAPI** routes pack; ORM/DI and more
7
+ frameworks follow over the same harness. Zero ``agentforge`` imports (ADR-0001).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from .base import FrameworkFacts, FrameworkPack
13
+ from .detect import active_frameworks
14
+ from .extractor import FrameworkExtractor
15
+ from .registry import (
16
+ BUILTIN_FRAMEWORK_PACKS,
17
+ FrameworkRegistry,
18
+ builtin_framework_registry,
19
+ )
20
+
21
+ __all__ = [
22
+ "FrameworkFacts",
23
+ "FrameworkPack",
24
+ "FrameworkExtractor",
25
+ "FrameworkRegistry",
26
+ "BUILTIN_FRAMEWORK_PACKS",
27
+ "builtin_framework_registry",
28
+ "active_frameworks",
29
+ ]
@@ -0,0 +1,75 @@
1
+ """The ``FrameworkPack`` ABC + ``FrameworkFacts`` (feat-011).
2
+
3
+ A framework pack rides feat-002's extraction: given a parsed file it emits
4
+ framework nodes/edges (``Route``/``DataModel``/``Service`` + ``HANDLED_BY``/…)
5
+ attached to the symbols the *language* pack already produced (same SymbolID
6
+ scheme). The facts are merged into the file's ``FileSubgraph`` (pipeline), so
7
+ they inherit feat-004 incrementality for free — file-owned, ``parsed``
8
+ provenance, never touched by the resolver's ``clear_resolved``.
9
+
10
+ Detection is declarative: a pack lists the dependency names and import markers
11
+ that mean "this repo uses me"; ``frameworks.detect`` does the scanning. Zero
12
+ ``agentforge`` imports (ADR-0001).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from abc import ABC, abstractmethod
18
+
19
+ from pydantic import BaseModel, Field
20
+
21
+ from agentforge_graph.core import Edge, GraphStore, Node, SourceFile
22
+
23
+
24
+ class FrameworkFacts(BaseModel):
25
+ """What a pack derived from one file. ``unresolved`` counts registrations
26
+ the pack recognised but could not extract statically (dynamic paths,
27
+ class-based handlers at MVP) — surfaced in the IndexReport, never dropped
28
+ silently."""
29
+
30
+ nodes: list[Node] = Field(default_factory=list)
31
+ edges: list[Edge] = Field(default_factory=list)
32
+ unresolved: int = 0
33
+
34
+
35
+ class FrameworkPack(ABC):
36
+ """A framework's extraction rules over one language's parse trees."""
37
+
38
+ name: str = "" # "fastapi"
39
+ language: str = "" # the language pack this rides ("python")
40
+ language_slug: str = "" # SymbolID slug of that language ("py")
41
+
42
+ @property
43
+ def slugs(self) -> tuple[str, ...]:
44
+ """The SymbolID slugs this pack extracts over. Defaults to the single
45
+ ``language_slug``; a pack spanning sibling languages (e.g. Express over
46
+ JS *and* TS) overrides this and uses ``file.language`` per file."""
47
+ return (self.language_slug,)
48
+
49
+ version: str = "1" # bump on pattern changes (provenance + future --full)
50
+ dep_names: tuple[str, ...] = () # manifest dependency names that imply this framework
51
+ import_markers: tuple[str, ...] = () # source substrings that confirm use
52
+
53
+ def detect(self, dep_names: set[str], source_sample: str) -> bool:
54
+ """Active for this repo? A declared dependency, or an import marker in
55
+ the sampled source. Override for bespoke detection."""
56
+ if dep_names.intersection(self.dep_names):
57
+ return True
58
+ return any(marker in source_sample for marker in self.import_markers)
59
+
60
+ @abstractmethod
61
+ def extract(self, file: SourceFile, repo: str, commit: str) -> FrameworkFacts:
62
+ """Pass-1, file-isolated: emit framework nodes/edges for ``file``."""
63
+
64
+ async def resolve(self, store: GraphStore, commit: str = "") -> list[Edge]:
65
+ """Optional pass-2 cross-file stitching (ORM relationship/FK string
66
+ targets, router prefixes, string view refs). Reads the graph built in
67
+ pass-1 and returns resolved-provenance edges; the orchestrator clears the
68
+ previous generation and upserts the result (idempotent). Packs with no
69
+ cross-file step return ``[]``."""
70
+ return []
71
+
72
+ def coupled_files(self, path: str) -> bool:
73
+ """True for files whose change forces a framework re-resolve (e.g.
74
+ ``urls.py``). MVP: no pass-2, so always False."""
75
+ return False
@@ -0,0 +1,124 @@
1
+ """Framework detection (feat-011): which packs are active for a repo.
2
+
3
+ Honours ``frameworks.enabled`` (``auto`` → detect, ``off`` → none, or an
4
+ explicit list) plus ``frameworks.packs`` force-enable. Auto-detection reads
5
+ dependency manifests (``pyproject.toml`` / ``requirements*.txt``) and, as a
6
+ fallback, samples source text for each pack's import markers — so a repo that
7
+ vendors a framework without a manifest still activates.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ import tomllib
14
+ from pathlib import Path
15
+
16
+ from agentforge_graph.config import FrameworksConfig
17
+
18
+ from .base import FrameworkPack
19
+ from .registry import FrameworkRegistry
20
+
21
+ _NAME_RE = re.compile(r"^[A-Za-z0-9_.-]+")
22
+ _SAMPLE_CAP = 256 * 1024 # bytes of source to scan for import markers
23
+
24
+
25
+ def _norm(name: str) -> str:
26
+ return name.strip().lower().replace("_", "-")
27
+
28
+
29
+ def _dep_name(requirement: str) -> str:
30
+ """The distribution name from a PEP 508 requirement string
31
+ (``fastapi>=0.110`` → ``fastapi``)."""
32
+ m = _NAME_RE.match(requirement.strip())
33
+ return _norm(m.group(0)) if m else ""
34
+
35
+
36
+ def _pyproject_deps(path: Path) -> set[str]:
37
+ try:
38
+ data = tomllib.loads(path.read_text(encoding="utf-8", errors="replace"))
39
+ except (OSError, tomllib.TOMLDecodeError):
40
+ return set()
41
+ deps: set[str] = set()
42
+ project = data.get("project", {})
43
+ for req in project.get("dependencies", []):
44
+ deps.add(_dep_name(str(req)))
45
+ for group in project.get("optional-dependencies", {}).values():
46
+ deps.update(_dep_name(str(req)) for req in group)
47
+ # poetry-style
48
+ poetry = data.get("tool", {}).get("poetry", {})
49
+ deps.update(_norm(k) for k in poetry.get("dependencies", {}))
50
+ return {d for d in deps if d and d != "python"}
51
+
52
+
53
+ def _requirements_deps(path: Path) -> set[str]:
54
+ try:
55
+ text = path.read_text(encoding="utf-8", errors="replace")
56
+ except OSError:
57
+ return set()
58
+ deps: set[str] = set()
59
+ for line in text.splitlines():
60
+ line = line.strip()
61
+ if line and not line.startswith(("#", "-")):
62
+ deps.add(_dep_name(line))
63
+ return {d for d in deps if d}
64
+
65
+
66
+ def _manifest_deps(repo_path: Path) -> set[str]:
67
+ """Best-effort dependency-name set from common manifests (names only),
68
+ lowercased with ``_``→``-`` so ``Flask_SQLAlchemy`` == ``flask-sqlalchemy``."""
69
+ deps: set[str] = set()
70
+ pyproject = repo_path / "pyproject.toml"
71
+ if pyproject.is_file():
72
+ deps |= _pyproject_deps(pyproject)
73
+ for req in repo_path.glob("requirements*.txt"):
74
+ deps |= _requirements_deps(req)
75
+ return deps
76
+
77
+
78
+ def _source_sample(repo_path: Path, exts: set[str]) -> str:
79
+ """Concatenate up to ``_SAMPLE_CAP`` bytes of source (files matching the
80
+ active languages' extensions) for import-marker confirmation."""
81
+ chunks: list[str] = []
82
+ total = 0
83
+ for path in sorted(repo_path.rglob("*")):
84
+ if total >= _SAMPLE_CAP:
85
+ break
86
+ if not path.is_file() or path.suffix not in exts or ".ckg" in path.parts:
87
+ continue
88
+ try:
89
+ text = path.read_text(encoding="utf-8", errors="replace")
90
+ except OSError:
91
+ continue
92
+ chunks.append(text)
93
+ total += len(text)
94
+ return "\n".join(chunks)
95
+
96
+
97
+ def active_frameworks(
98
+ repo_path: str | Path,
99
+ config: str | Path | None,
100
+ registry: FrameworkRegistry,
101
+ language_extensions: set[str],
102
+ ) -> list[FrameworkPack]:
103
+ cfg = FrameworksConfig.load(config)
104
+ enabled = cfg.enabled
105
+ if enabled == "off":
106
+ return []
107
+
108
+ root = Path(repo_path)
109
+ forced = set(cfg.packs)
110
+
111
+ # Explicit list short-circuits detection (still honour force-enable).
112
+ if isinstance(enabled, list):
113
+ wanted = set(enabled) | forced
114
+ return [p for p in registry.packs if p.name in wanted]
115
+
116
+ # "auto": dependency manifest + import-marker fallback.
117
+ deps = _manifest_deps(root)
118
+ needs_sample = any(p.import_markers for p in registry.packs)
119
+ sample = _source_sample(root, language_extensions) if needs_sample else ""
120
+ active: list[FrameworkPack] = []
121
+ for pack in registry.packs:
122
+ if pack.name in forced or pack.detect(deps, sample):
123
+ active.append(pack)
124
+ return active
@@ -0,0 +1,63 @@
1
+ """``FrameworkExtractor`` — run the active framework packs over one file
2
+ (feat-011). Selects packs by the file's language and merges their
3
+ ``FrameworkFacts``. File-isolated and stateless, so it runs inside the same
4
+ worker thread as the language extractor (pipeline)."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from agentforge_graph.core import (
9
+ Edge,
10
+ EdgeKind,
11
+ GraphQuery,
12
+ GraphStore,
13
+ Node,
14
+ NodeKind,
15
+ SourceFile,
16
+ )
17
+
18
+ from .base import FrameworkFacts, FrameworkPack
19
+
20
+ _ALL = 10_000_000 # effectively unbounded query for v0.1 graph sizes
21
+
22
+
23
+ class FrameworkExtractor:
24
+ def __init__(self, packs: list[FrameworkPack]) -> None:
25
+ self._packs = list(packs)
26
+ self._by_slug: dict[str, list[FrameworkPack]] = {}
27
+ for pack in packs:
28
+ for slug in pack.slugs:
29
+ self._by_slug.setdefault(slug, []).append(pack)
30
+
31
+ @property
32
+ def active(self) -> bool:
33
+ return bool(self._packs)
34
+
35
+ def extract(self, file: SourceFile, repo: str, commit: str) -> FrameworkFacts:
36
+ merged = FrameworkFacts()
37
+ for pack in self._by_slug.get(file.language, []):
38
+ facts = pack.extract(file, repo, commit)
39
+ merged.nodes.extend(facts.nodes)
40
+ merged.edges.extend(facts.edges)
41
+ merged.unresolved += facts.unresolved
42
+ return merged
43
+
44
+ async def resolve(self, store: GraphStore, commit: str = "") -> tuple[int, int]:
45
+ """Run every active pack's cross-file pass-2 (ORM relationship/FK string
46
+ targets, …) and replace the previous generation of framework-resolved
47
+ edges. Globally idempotent: clears all ``RELATES_TO`` out of the current
48
+ framework nodes, then rebuilds from the whole-repo node set — so an
49
+ incremental resolve converges to the same graph as a full re-index
50
+ (feat-004). Returns ``(edges_resolved, targets_unresolved)``."""
51
+ if not self._packs:
52
+ return 0, 0
53
+ models = (await store.query(GraphQuery(kinds=[NodeKind.DATA_MODEL], limit=_ALL))).nodes
54
+ if models:
55
+ await store.clear_outgoing([m.id for m in models], EdgeKind.RELATES_TO)
56
+ pending = sum(len(m.attrs.get("relations") or []) for m in models)
57
+
58
+ edges: list[Node | Edge] = []
59
+ for pack in self._packs:
60
+ edges.extend(await pack.resolve(store, commit))
61
+ if edges:
62
+ await store.add(edges)
63
+ return len(edges), max(0, pending - len(edges))
@@ -0,0 +1,93 @@
1
+ """Shared ORM pass-2 helpers (feat-011).
2
+
3
+ The cross-file ``RELATES_TO`` stitch is the same shape for every ORM pack: load
4
+ the whole-repo model set, index it by class name and table, then turn each
5
+ model's pending ``relations`` (recorded in pass-1) into edges via a unique-match
6
+ lookup (ADR-0004 — never guess an ambiguous target). Only the per-relation
7
+ target resolution differs per framework, so that is injected as a callback.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Callable, Iterable
13
+
14
+ from agentforge_graph.core import (
15
+ Edge,
16
+ EdgeKind,
17
+ GraphQuery,
18
+ GraphStore,
19
+ Node,
20
+ NodeKind,
21
+ Provenance,
22
+ SymbolID,
23
+ )
24
+
25
+ _ALL = 10_000_000 # effectively unbounded query for v0.1 graph sizes
26
+
27
+
28
+ class ModelIndex:
29
+ """Whole-repo model lookup by class name and table, each mapping to the set
30
+ of model ids carrying it (so duplicates are detectable and never guessed)."""
31
+
32
+ def __init__(self, models: Iterable[Node]) -> None:
33
+ self.by_class: dict[str, set[str]] = {}
34
+ self.by_table: dict[str, set[str]] = {}
35
+ for m in models:
36
+ cls = str(m.attrs.get("model_class", ""))
37
+ if cls:
38
+ self.by_class.setdefault(cls, set()).add(m.id)
39
+ tbl = str(m.attrs.get("table", ""))
40
+ if tbl:
41
+ self.by_table.setdefault(tbl, set()).add(m.id)
42
+
43
+ def unique_class(self, name: str) -> str | None:
44
+ return _unique(self.by_class.get(name))
45
+
46
+ def unique_table(self, name: str) -> str | None:
47
+ return _unique(self.by_table.get(name))
48
+
49
+
50
+ def _unique(ids: set[str] | None) -> str | None:
51
+ return next(iter(ids)) if ids and len(ids) == 1 else None
52
+
53
+
54
+ async def framework_models(store: GraphStore, framework: str) -> list[Node]:
55
+ """Every ``DataModel`` node emitted by ``framework``."""
56
+ nodes = (await store.query(GraphQuery(kinds=[NodeKind.DATA_MODEL], limit=_ALL))).nodes
57
+ return [m for m in nodes if m.attrs.get("framework") == framework]
58
+
59
+
60
+ def relations_to_edges(
61
+ models: list[Node],
62
+ index: ModelIndex,
63
+ resolve_target: Callable[[dict[str, str], ModelIndex], str | None],
64
+ prov: Provenance,
65
+ ) -> list[Edge]:
66
+ """Build deduped ``RELATES_TO`` edges from each model's pending relations.
67
+ ``resolve_target`` maps one relation dict to a target model id (or None when
68
+ external/ambiguous). Edges carry ``attrs.kind`` (relationship/fk/m2m/…) and
69
+ ``attrs.via`` (the field), owned by the source model's file for incremental
70
+ invalidation."""
71
+ edges: list[Edge] = []
72
+ seen: set[tuple[str, str, str]] = set()
73
+ for m in models:
74
+ for rel in m.attrs.get("relations") or []:
75
+ target_id = resolve_target(rel, index)
76
+ if target_id is None:
77
+ continue
78
+ kind = str(rel.get("kind", ""))
79
+ key = (m.id, target_id, kind)
80
+ if key in seen:
81
+ continue
82
+ seen.add(key)
83
+ edges.append(
84
+ Edge(
85
+ src=m.id,
86
+ dst=target_id,
87
+ kind=EdgeKind.RELATES_TO,
88
+ attrs={"kind": kind, "via": str(rel.get("field", ""))},
89
+ provenance=prov,
90
+ origin_path=SymbolID.parse(m.id).path,
91
+ )
92
+ )
93
+ return edges