agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,251 @@
1
+ """Reranking hook (feat-006 seam; ENH-009).
2
+
3
+ Three modes, all behind ``retrieve.rerank``:
4
+
5
+ - ``off`` (default) — identity; pure cosine + graph score.
6
+ - ``lexical`` — a deterministic, dependency-free blend of the base score with the
7
+ **subtoken overlap** between the query and the candidate's name + code, so a
8
+ chunk whose symbol the query names (``ZodObject``, ``_parse``, ``res.send``)
9
+ sorts up even when its raw cosine landed *near* the answer. Useful for
10
+ keyword/symbol-naming queries; measured mixed on prose (hence opt-in).
11
+ - ``cross_encoder`` — a real semantic re-score: a cross-encoder relevance model
12
+ (``sentence-transformers``, the ``rerank`` extra) scores each (query,
13
+ candidate) pair, blended with the base score. The highest-leverage lever for
14
+ natural-language → symbol precision. The model is lazy-loaded so the base
15
+ install / CI never import torch; the blend logic is injectable (``CrossScorer``)
16
+ so it is tested without the model. Third-party only — no ``agentforge`` import
17
+ (ADR-0001).
18
+
19
+ All rerankers are deterministic given their inputs and order-stable on ties."""
20
+
21
+ from __future__ import annotations
22
+
23
+ import asyncio
24
+ import math
25
+ import re
26
+ from typing import Protocol
27
+
28
+ from .pack import ContextItem
29
+
30
+ # A capable, small default cross-encoder; overridable via ``retrieve.rerank_model``.
31
+ DEFAULT_CROSS_ENCODER = "cross-encoder/ms-marco-MiniLM-L-6-v2"
32
+ _MAX_CANDIDATE_CHARS = 2000 # cross-encoders truncate anyway; bound the payload
33
+
34
+ # Function words that carry no retrieval signal — dropped from both sides.
35
+ _STOP = frozenset(
36
+ [
37
+ "a",
38
+ "an",
39
+ "the",
40
+ "is",
41
+ "are",
42
+ "was",
43
+ "were",
44
+ "be",
45
+ "how",
46
+ "do",
47
+ "does",
48
+ "did",
49
+ "of",
50
+ "to",
51
+ "in",
52
+ "on",
53
+ "for",
54
+ "and",
55
+ "or",
56
+ "with",
57
+ "this",
58
+ "that",
59
+ "it",
60
+ "as",
61
+ "at",
62
+ "by",
63
+ "from",
64
+ "into",
65
+ "than",
66
+ "then",
67
+ "over",
68
+ "under",
69
+ "not",
70
+ "no",
71
+ "your",
72
+ "you",
73
+ "we",
74
+ "i",
75
+ "me",
76
+ "my",
77
+ "our",
78
+ "their",
79
+ "its",
80
+ "they",
81
+ "them",
82
+ "he",
83
+ "she",
84
+ "where",
85
+ "what",
86
+ "which",
87
+ "who",
88
+ "when",
89
+ "why",
90
+ "can",
91
+ "could",
92
+ "should",
93
+ "would",
94
+ "will",
95
+ "shall",
96
+ "may",
97
+ "might",
98
+ "must",
99
+ "have",
100
+ "has",
101
+ "had",
102
+ "get",
103
+ "set",
104
+ "up",
105
+ "out",
106
+ "off",
107
+ "via",
108
+ "per",
109
+ "use",
110
+ "used",
111
+ "using",
112
+ "return",
113
+ "returns",
114
+ ]
115
+ )
116
+ # Split identifiers into subtokens: ALLCAPS, CamelChunk, or a run of lower/digits.
117
+ _CAMEL = re.compile(r"[A-Z]+(?![a-z])|[A-Z][a-z0-9]*|[a-z0-9]+")
118
+
119
+
120
+ def _tokens(text: str) -> set[str]:
121
+ """Lowercased subtokens of ``text``: splits on non-alphanumerics *and*
122
+ camelCase, so ``ZodObject._parse`` → {zod, object, parse}. Drops stopwords
123
+ and single chars (noise)."""
124
+ out: set[str] = set()
125
+ for raw in re.split(r"[^A-Za-z0-9]+", text):
126
+ for m in _CAMEL.findall(raw):
127
+ low = m.lower()
128
+ if len(low) >= 2 and low not in _STOP:
129
+ out.add(low)
130
+ return out
131
+
132
+
133
+ class Reranker(Protocol):
134
+ async def rerank(self, query: str, items: list[ContextItem]) -> list[ContextItem]: ...
135
+
136
+
137
+ class NoopReranker:
138
+ """Identity reranker (rerank: off)."""
139
+
140
+ async def rerank(self, query: str, items: list[ContextItem]) -> list[ContextItem]:
141
+ return items
142
+
143
+
144
+ class LexicalReranker:
145
+ """Blend base score with query↔candidate subtoken overlap, then re-sort.
146
+
147
+ ``final = (1 - weight)·base + weight·overlap``, where ``overlap`` is the
148
+ fraction of (non-stopword) query subtokens present in the candidate's
149
+ name + code. Deterministic and order-stable on ties."""
150
+
151
+ def __init__(self, weight: float = 0.5) -> None:
152
+ self._w = max(0.0, min(1.0, weight))
153
+
154
+ async def rerank(self, query: str, items: list[ContextItem]) -> list[ContextItem]:
155
+ qtoks = _tokens(query)
156
+ if not qtoks or not items:
157
+ return items
158
+ rescored: list[ContextItem] = []
159
+ for it in items:
160
+ itoks = _tokens(f"{it.name} {it.code or ''}")
161
+ overlap = len(qtoks & itoks) / len(qtoks)
162
+ final = (1.0 - self._w) * it.score + self._w * overlap
163
+ rescored.append(
164
+ it.model_copy(update={"score": final, "why": [*it.why, f"lexical {overlap:.2f}"]})
165
+ )
166
+ rescored.sort(key=lambda i: (-i.score, i.id)) # id tiebreak = deterministic
167
+ return rescored
168
+
169
+
170
+ class CrossScorer(Protocol):
171
+ """Scores (query, candidate-text) pairs — higher = more relevant. The
172
+ injection seam that keeps the model out of the blend logic (and out of CI)."""
173
+
174
+ def score(self, query: str, texts: list[str]) -> list[float]: ...
175
+
176
+
177
+ def _candidate_text(it: ContextItem) -> str:
178
+ body = it.code or it.signature()
179
+ return f"{it.name}\n{body}"[:_MAX_CANDIDATE_CHARS]
180
+
181
+
182
+ class CrossEncoderReranker:
183
+ """Re-score the top-k candidates with a cross-encoder, then blend and re-sort.
184
+
185
+ ``final = (1 - weight)·base + weight·σ(cross_score)`` — the cross-encoder's
186
+ raw relevance logit is squashed to ``[0, 1]`` (so it is comparable to the
187
+ cosine-scale base score) and blended. The model call runs off the event loop
188
+ (``to_thread``); identity on an empty query/candidate set."""
189
+
190
+ def __init__(self, scorer: CrossScorer, weight: float = 0.5) -> None:
191
+ self._scorer = scorer
192
+ self._w = max(0.0, min(1.0, weight))
193
+
194
+ async def rerank(self, query: str, items: list[ContextItem]) -> list[ContextItem]:
195
+ if not query or not items:
196
+ return items
197
+ texts = [_candidate_text(it) for it in items]
198
+ raw = await asyncio.to_thread(self._scorer.score, query, texts)
199
+ rescored: list[ContextItem] = []
200
+ for it, r in zip(items, raw, strict=True):
201
+ ce = 1.0 / (1.0 + math.exp(-r)) # σ → [0, 1]
202
+ final = (1.0 - self._w) * it.score + self._w * ce
203
+ rescored.append(
204
+ it.model_copy(update={"score": final, "why": [*it.why, f"cross-encoder {ce:.2f}"]})
205
+ )
206
+ rescored.sort(key=lambda i: (-i.score, i.id))
207
+ return rescored
208
+
209
+
210
+ class SentenceTransformerScorer:
211
+ """A ``CrossScorer`` backed by ``sentence_transformers.CrossEncoder``. The
212
+ model is loaded lazily on first use, so importing this module (and running
213
+ CI) never pulls torch; the import error names the extra to install."""
214
+
215
+ def __init__(self, model_name: str = DEFAULT_CROSS_ENCODER) -> None:
216
+ self._model_name = model_name
217
+ self._model: object | None = None
218
+
219
+ def _ensure_model(self) -> object:
220
+ if self._model is None:
221
+ try:
222
+ from sentence_transformers import CrossEncoder
223
+ except ImportError as exc: # the extra isn't installed
224
+ raise ImportError(
225
+ "cross-encoder rerank needs the 'rerank' extra (uv sync --extra rerank)"
226
+ ) from exc
227
+ self._model = CrossEncoder(self._model_name)
228
+ return self._model
229
+
230
+ def score(self, query: str, texts: list[str]) -> list[float]:
231
+ if not texts:
232
+ return []
233
+ model = self._ensure_model()
234
+ return [float(s) for s in model.predict([(query, t) for t in texts])] # type: ignore[attr-defined]
235
+
236
+
237
+ def reranker_from_config(rerank: str, weight: float = 0.5, model: str = "") -> Reranker:
238
+ """Resolve the ``retrieve.rerank`` config value to a reranker.
239
+ ``off``/empty → identity; ``lexical`` → :class:`LexicalReranker`;
240
+ ``cross_encoder`` → :class:`CrossEncoderReranker` over a lazily-loaded
241
+ sentence-transformers model (``model`` overrides the default)."""
242
+ ref = (rerank or "off").strip()
243
+ if ref in ("", "off"):
244
+ return NoopReranker()
245
+ if ref == "lexical":
246
+ return LexicalReranker(weight)
247
+ if ref == "cross_encoder":
248
+ return CrossEncoderReranker(
249
+ SentenceTransformerScorer(model or DEFAULT_CROSS_ENCODER), weight
250
+ )
251
+ raise ValueError(f"unknown reranker {ref!r}; use 'off', 'lexical' or 'cross_encoder'")
@@ -0,0 +1,286 @@
1
+ """``Retriever`` — vector entry → typed graph expansion → provenance-weighted
2
+ merge. Deterministic and LLM-free; the single retrieval surface feat-008 and
3
+ the enrichers ride on.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from typing import Literal
10
+
11
+ from agentforge_graph.config import RetrieveConfig
12
+ from agentforge_graph.core import Direction, EdgeKind, Node, NodeKind, Source, SymbolID
13
+ from agentforge_graph.embed import Embedder
14
+ from agentforge_graph.store import Store
15
+
16
+ from .pack import ContextItem, ContextPack
17
+ from .rerank import NoopReranker, Reranker
18
+ from .scoring import dedupe_max, edge_weight, step_score
19
+
20
+ Mode = Literal["context", "impact", "definition", "similar"]
21
+
22
+ # code-symbol kinds an as_of allow-filter constrains (feat-009)
23
+ _SYMBOL_KINDS = frozenset({NodeKind.CLASS, NodeKind.FUNCTION, NodeKind.METHOD})
24
+
25
+ # A query "smells architectural" when it asks about decisions/rationale/design — the
26
+ # case where ADR/doc prose SHOULD rank with code. Else docs are down-weighted (feat-010).
27
+ _ARCH_TERMS = frozenset(
28
+ {
29
+ "why",
30
+ "decision",
31
+ "decisions",
32
+ "rationale",
33
+ "architecture",
34
+ "architectural",
35
+ "design",
36
+ "convention",
37
+ "conventions",
38
+ "adr",
39
+ "govern",
40
+ "governs",
41
+ "principle",
42
+ "tradeoff",
43
+ "trade-off",
44
+ "policy",
45
+ "constraint",
46
+ "supposed",
47
+ "allowed",
48
+ "forbidden",
49
+ "deprecated",
50
+ }
51
+ )
52
+
53
+ _WORD_RE = re.compile(r"[a-z0-9-]+")
54
+
55
+
56
+ def _is_architectural(query: str) -> bool:
57
+ return any(w in _ARCH_TERMS for w in _WORD_RE.findall(query.lower()))
58
+
59
+
60
+ # Trust order for the min_provenance filter: llm < parsed < resolved <= manual
61
+ # (human-asserted facts are trusted; ADR-0004 / spec §2). Distinct from
62
+ # GraphQuery.min_source and from the scoring edge_weights.
63
+ _RANK: dict[Source, int] = {Source.LLM: 0, Source.PARSED: 1, Source.RESOLVED: 2, Source.MANUAL: 3}
64
+ _FLOOR: dict[str, int] = {"parsed": 1, "resolved": 2}
65
+
66
+ # feat-009 churn/authorship fields denormalised onto a symbol's node.attrs; the
67
+ # Retriever surfaces them on the item without joining the temporal sidecar (it
68
+ # stays in the deterministic core, ADR-0001). Empty → item.temporal stays None.
69
+ _TEMPORAL_KEYS = (
70
+ "introduced",
71
+ "introduced_ts",
72
+ "last_changed",
73
+ "last_changed_ts",
74
+ "churn_30d",
75
+ "churn_90d",
76
+ "top_authors",
77
+ )
78
+
79
+
80
+ def _temporal_attrs(node: Node) -> dict[str, object] | None:
81
+ out = {k: node.attrs[k] for k in _TEMPORAL_KEYS if k in node.attrs}
82
+ return out or None
83
+
84
+
85
+ _MODE_EDGES: dict[Mode, tuple[list[EdgeKind], Direction]] = {
86
+ # GOVERNS/DESCRIBES (feat-010) surface the decision/doc governing a retrieved
87
+ # symbol; TAGGED + SUMMARIZES (feat-012) surface its design-pattern role and
88
+ # the module summary. The differentiators; llm items obey include_llm_facts.
89
+ "context": (
90
+ [
91
+ EdgeKind.CALLS,
92
+ EdgeKind.CONTAINS,
93
+ EdgeKind.INHERITS,
94
+ EdgeKind.REFERENCES,
95
+ EdgeKind.GOVERNS,
96
+ EdgeKind.DESCRIBES,
97
+ EdgeKind.TAGGED,
98
+ EdgeKind.SUMMARIZES,
99
+ ],
100
+ "both",
101
+ ),
102
+ "impact": ([EdgeKind.CALLS, EdgeKind.IMPORTS, EdgeKind.IMPLEMENTS], "in"),
103
+ "definition": ([EdgeKind.CONTAINS, EdgeKind.CHUNK_OF], "both"),
104
+ "similar": ([], "both"),
105
+ }
106
+
107
+
108
+ class Retriever:
109
+ def __init__(
110
+ self,
111
+ store: Store,
112
+ embedder: Embedder,
113
+ config: RetrieveConfig,
114
+ reranker: Reranker | None = None,
115
+ ) -> None:
116
+ self.store = store
117
+ self.embedder = embedder
118
+ self.config = config
119
+ self.reranker: Reranker = reranker or NoopReranker()
120
+
121
+ async def retrieve(
122
+ self,
123
+ query: str | None = None,
124
+ symbol: str | None = None,
125
+ mode: Mode = "context",
126
+ k: int | None = None,
127
+ depth: int | None = None,
128
+ edge_kinds: list[EdgeKind] | None = None,
129
+ min_provenance: Literal["parsed", "resolved"] | None = None,
130
+ include_llm_facts: bool = True,
131
+ allow_ids: set[str] | None = None,
132
+ ) -> ContextPack:
133
+ cfg = self.config
134
+ k = cfg.k if k is None else k
135
+ depth = cfg.depth if depth is None else depth
136
+ items: list[ContextItem] = []
137
+ notes: list[str] = []
138
+ seeds: dict[str, float] = {}
139
+
140
+ # --- entry ---
141
+ if query is not None:
142
+ qvec = (await self.embedder.embed([query], "query"))[0]
143
+ # down-weight ADR/doc prose so code outranks equally-similar docs, unless
144
+ # the query smells architectural (then docs keep their full score). feat-010.
145
+ doc_w = 1.0 if _is_architectural(query) else self.config.doc_weight
146
+ for hit in await self.store.vectors.search(qvec, k):
147
+ node = await self.store.graph.get(hit.ref)
148
+ if node is None:
149
+ continue
150
+ score = hit.score * doc_w if node.kind is NodeKind.DOC_CHUNK else hit.score
151
+ items.append(self._item(node, score, [f"vector hit {score:.2f}"]))
152
+ if mode != "similar":
153
+ # a chunk hit seeds its symbols; a summary hit (feat-012)
154
+ # seeds the code it summarizes — concept query → code.
155
+ for edge in await self.store.graph.adjacent(
156
+ hit.ref, [EdgeKind.CHUNK_OF, EdgeKind.SUMMARIZES], "out"
157
+ ):
158
+ seeds[edge.dst] = max(seeds.get(edge.dst, 0.0), score)
159
+ # a doc-chunk hit (feat-010) seeds the code it attaches to: an
160
+ # ADR section seeds its containing Decision (CONTAINS in → then
161
+ # GOVERNS to governed code); a docstring seeds the symbol it
162
+ # DESCRIBES (out). Either way a prose query reaches the code.
163
+ if node.kind is NodeKind.DOC_CHUNK:
164
+ for edge in await self.store.graph.adjacent(
165
+ hit.ref, [EdgeKind.CONTAINS], "in"
166
+ ):
167
+ seeds[edge.src] = max(seeds.get(edge.src, 0.0), score)
168
+ for edge in await self.store.graph.adjacent(
169
+ hit.ref, [EdgeKind.DESCRIBES], "out"
170
+ ):
171
+ seeds[edge.dst] = max(seeds.get(edge.dst, 0.0), score)
172
+ if symbol is not None:
173
+ seeds[symbol] = max(seeds.get(symbol, 0.0), 1.0)
174
+
175
+ for sid, score in list(seeds.items()):
176
+ node = await self.store.graph.get(sid)
177
+ if node is None:
178
+ del seeds[sid]
179
+ continue
180
+ items.append(self._item(node, score, ["entry"]))
181
+
182
+ # --- expand ---
183
+ kinds, direction = _MODE_EDGES[mode]
184
+ if edge_kinds is not None:
185
+ kinds = edge_kinds
186
+ if mode != "similar" and depth > 0 and seeds:
187
+ await self._expand(seeds, kinds, direction, depth, items, notes)
188
+
189
+ # --- merge ---
190
+ items = self._filter(items, min_provenance, include_llm_facts)
191
+ if allow_ids is not None: # feat-009 as_of: drop symbols not alive at the commit
192
+ items = [it for it in items if it.kind not in _SYMBOL_KINDS or it.id in allow_ids]
193
+ items = dedupe_max(items)
194
+ items = await self.reranker.rerank(query or "", items)
195
+ return ContextPack(query=query, symbol=symbol, mode=mode, items=items, notes=notes)
196
+
197
+ async def _expand(
198
+ self,
199
+ seeds: dict[str, float],
200
+ kinds: list[EdgeKind],
201
+ direction: Direction,
202
+ depth: int,
203
+ items: list[ContextItem],
204
+ notes: list[str],
205
+ ) -> None:
206
+ cfg = self.config
207
+ frontier = dict(seeds)
208
+ visited = set(seeds)
209
+ for hop in range(1, depth + 1):
210
+ nxt: dict[str, float] = {}
211
+ for sid, score in frontier.items():
212
+ parent = await self.store.graph.get(sid)
213
+ pname = parent.name if parent else sid
214
+ edges = await self.store.graph.adjacent(sid, kinds, direction)
215
+ if len(edges) > cfg.fanout_cap:
216
+ notes.append(f"fan-out cap {cfg.fanout_cap} at {pname} ({len(edges)} edges)")
217
+ edges = edges[: cfg.fanout_cap]
218
+ for edge in edges:
219
+ other = edge.dst if edge.src == sid else edge.src
220
+ onode = await self.store.graph.get(other)
221
+ if onode is None:
222
+ continue
223
+ oscore = step_score(
224
+ score, cfg.decay, edge_weight(cfg.edge_weights, edge.provenance.source)
225
+ )
226
+ items.append(
227
+ self._item(onode, oscore, [f"{edge.kind.value} of {pname} (hop {hop})"])
228
+ )
229
+ if other not in visited:
230
+ visited.add(other)
231
+ nxt[other] = max(nxt.get(other, 0.0), oscore)
232
+ frontier = nxt
233
+ if not frontier:
234
+ break
235
+
236
+ def _item(self, node: Node, score: float, why: list[str]) -> ContextItem:
237
+ return ContextItem(
238
+ id=node.id,
239
+ kind=node.kind,
240
+ name=node.name,
241
+ score=score,
242
+ path=SymbolID.parse(node.id).path,
243
+ span=node.span,
244
+ code=self._render_code(node),
245
+ provenance=node.provenance.source,
246
+ why=list(why),
247
+ temporal=_temporal_attrs(node),
248
+ )
249
+
250
+ @staticmethod
251
+ def _render_code(node: Node) -> str | None:
252
+ """The verbatim block a retrieved item renders. A Decision (feat-010)
253
+ shows its status/date inline so the agent sees governance at a glance."""
254
+ if node.kind is NodeKind.DECISION:
255
+ status = node.attrs.get("status", "")
256
+ date = node.attrs.get("date", "")
257
+ adr = node.attrs.get("adr_id", "")
258
+ stamp = ", ".join(x for x in (status, date) if x)
259
+ prefix = f"[{stamp}] " if stamp else ""
260
+ label = f"{adr}: " if adr else ""
261
+ return f"{prefix}{label}{node.attrs.get('title', node.name)}"
262
+ if node.kind is NodeKind.PATTERN_TAG:
263
+ return f"[llm] design pattern: {node.name}"
264
+ if node.kind is NodeKind.SUMMARY:
265
+ return f"[summary] {node.attrs.get('text', '')}"
266
+ if node.kind is NodeKind.DOC_CHUNK: # feat-010 — ADR/doc prose
267
+ heading = node.attrs.get("heading", "")
268
+ text = node.attrs.get("text", "")
269
+ return f"[doc] {heading}\n{text}".strip() if heading else f"[doc] {text}".strip()
270
+ return node.attrs.get("code")
271
+
272
+ def _filter(
273
+ self,
274
+ items: list[ContextItem],
275
+ min_provenance: Literal["parsed", "resolved"] | None,
276
+ include_llm_facts: bool,
277
+ ) -> list[ContextItem]:
278
+ floor = _FLOOR[min_provenance] if min_provenance else None
279
+ out: list[ContextItem] = []
280
+ for it in items:
281
+ if not include_llm_facts and it.provenance is Source.LLM:
282
+ continue
283
+ if floor is not None and _RANK[it.provenance] < floor:
284
+ continue
285
+ out.append(it)
286
+ return out
@@ -0,0 +1,36 @@
1
+ """Scoring math for retrieval: provenance edge weights, per-hop decay, and
2
+ dedupe (max score wins, why-traces unioned)."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from agentforge_graph.core import Source
7
+
8
+ from .pack import ContextItem
9
+
10
+
11
+ def edge_weight(weights: dict[str, float], source: Source) -> float:
12
+ """Weight an expansion edge by its provenance — resolved > parsed > llm
13
+ (ADR-0004). Unknown sources fall back to 0.5."""
14
+ return weights.get(source.value, 0.5)
15
+
16
+
17
+ def step_score(parent_score: float, decay: float, weight: float) -> float:
18
+ """One hop of decay: ``parent × decay × edge_weight``. Repeated over hops
19
+ yields the ``decay^hop`` falloff."""
20
+ return parent_score * decay * weight
21
+
22
+
23
+ def dedupe_max(items: list[ContextItem]) -> list[ContextItem]:
24
+ """Collapse items sharing an id to the highest-scoring one, unioning the
25
+ why-traces; return sorted by score descending."""
26
+ best: dict[str, ContextItem] = {}
27
+ whys: dict[str, list[str]] = {}
28
+ for it in items:
29
+ acc = whys.setdefault(it.id, [])
30
+ for w in it.why:
31
+ if w not in acc:
32
+ acc.append(w)
33
+ if it.id not in best or it.score > best[it.id].score:
34
+ best[it.id] = it
35
+ merged = [it.model_copy(update={"why": whys[i]}) for i, it in best.items()]
36
+ return sorted(merged, key=lambda i: i.score, reverse=True)
@@ -0,0 +1,19 @@
1
+ """agentforge_graph.serve — MCP server & AgentForge tool API (feat-008).
2
+
3
+ The framework-facing serving layer: the nine read-only tools over feat-006/007,
4
+ bound both as native AgentForge ``Tool`` instances (``code_graph_tools``) and an
5
+ MCP server (``serve_mcp``) over **stdio or streamable-HTTP**, from one definition.
6
+ This package imports ``agentforge`` (the deliberate ADR-0001 exception).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .engine import TOOL_API_VERSION
12
+ from .server import build_mcp_server, code_graph_tools, serve_mcp
13
+
14
+ __all__ = [
15
+ "TOOL_API_VERSION",
16
+ "code_graph_tools",
17
+ "serve_mcp",
18
+ "build_mcp_server",
19
+ ]