agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,115 @@
1
+ """Shared AWS Bedrock Claude client for the enrichers (feat-012).
2
+
3
+ One boto3 ``bedrock-runtime`` client (lazy, optional STS assume-role, sync on a
4
+ worker thread), one ``invoke`` that runs the Anthropic Messages API on Bedrock
5
+ and accumulates cost from token usage. The pattern judge and the summarizer both
6
+ ride this; it is the only model-calling surface (deterministic tests use the
7
+ scripted variants).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import json
14
+ from typing import Any
15
+
16
+ # USD per 1M tokens (input, output). Conservative defaults; cheap tier.
17
+ _PRICES: dict[str, tuple[float, float]] = {
18
+ "haiku-4-5": (1.0, 5.0),
19
+ "haiku": (0.80, 4.0),
20
+ "sonnet": (3.0, 15.0),
21
+ }
22
+ _DEFAULT_PRICE = (1.0, 5.0)
23
+
24
+
25
+ def price_for(model: str) -> tuple[float, float]:
26
+ for key, price in _PRICES.items():
27
+ if key in model:
28
+ return price
29
+ return _DEFAULT_PRICE
30
+
31
+
32
+ class BedrockClient:
33
+ def __init__(
34
+ self,
35
+ model: str = "us.anthropic.claude-haiku-4-5-20251001-v1:0",
36
+ region: str = "us-east-1",
37
+ assume_role_arn: str | None = None,
38
+ max_tokens: int = 512,
39
+ ) -> None:
40
+ self.model = model
41
+ self.region = region
42
+ self.assume_role_arn = assume_role_arn
43
+ self.max_tokens = max_tokens
44
+ self._client: Any = None
45
+ self.cost_usd = 0.0
46
+
47
+ def _bedrock(self) -> Any:
48
+ if self._client is None:
49
+ import boto3
50
+
51
+ if self.assume_role_arn:
52
+ sts = boto3.client("sts", region_name=self.region)
53
+ creds = sts.assume_role(RoleArn=self.assume_role_arn, RoleSessionName="ckg-enrich")[
54
+ "Credentials"
55
+ ]
56
+ self._client = boto3.client(
57
+ "bedrock-runtime",
58
+ region_name=self.region,
59
+ aws_access_key_id=creds["AccessKeyId"],
60
+ aws_secret_access_key=creds["SecretAccessKey"],
61
+ aws_session_token=creds["SessionToken"],
62
+ )
63
+ else:
64
+ self._client = boto3.client("bedrock-runtime", region_name=self.region)
65
+ return self._client
66
+
67
+ async def invoke(
68
+ self,
69
+ system: str,
70
+ user: str,
71
+ tools: list[dict[str, Any]] | None = None,
72
+ tool_name: str | None = None,
73
+ ) -> dict[str, Any]:
74
+ """One Messages call; accumulates cost from usage. Returns the raw
75
+ payload (``content`` blocks + ``usage``)."""
76
+ payload = await asyncio.to_thread(self._invoke, system, user, tools, tool_name)
77
+ cents_in, cents_out = price_for(self.model)
78
+ usage = payload.get("usage", {})
79
+ self.cost_usd += (
80
+ usage.get("input_tokens", 0) * cents_in + usage.get("output_tokens", 0) * cents_out
81
+ ) / 1_000_000
82
+ return payload
83
+
84
+ def _invoke(
85
+ self,
86
+ system: str,
87
+ user: str,
88
+ tools: list[dict[str, Any]] | None,
89
+ tool_name: str | None,
90
+ ) -> dict[str, Any]:
91
+ body: dict[str, Any] = {
92
+ "anthropic_version": "bedrock-2023-05-31",
93
+ "max_tokens": self.max_tokens,
94
+ "system": system,
95
+ "messages": [{"role": "user", "content": user}],
96
+ }
97
+ if tools is not None:
98
+ body["tools"] = tools
99
+ if tool_name is not None:
100
+ body["tool_choice"] = {"type": "tool", "name": tool_name}
101
+ resp = self._bedrock().invoke_model(
102
+ modelId=self.model,
103
+ contentType="application/json",
104
+ accept="application/json",
105
+ body=json.dumps(body),
106
+ )
107
+ result: dict[str, Any] = json.loads(resp["body"].read())
108
+ return result
109
+
110
+
111
+ def text_of(payload: dict[str, Any]) -> str:
112
+ """Concatenate the text blocks of a Messages response."""
113
+ return "".join(
114
+ b.get("text", "") for b in payload.get("content", []) if b.get("type") == "text"
115
+ ).strip()
@@ -0,0 +1,23 @@
1
+ """``BedrockClaudeSummarizer`` — module summaries on AWS Bedrock (feat-012).
2
+
3
+ Thin endpoint adapter: the summary prompts + plain-text completion live in the
4
+ provider-neutral ``ClaudeSummarizer`` (``claude.py``); this wires it to a Bedrock
5
+ transport (``BedrockClient``). The Anthropic-API sibling is
6
+ ``AnthropicClaudeSummarizer`` (``anthropic.py``). Tests use ``ScriptedSummarizer``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .bedrock_client import BedrockClient
12
+ from .claude import ClaudeSummarizer
13
+
14
+
15
+ class BedrockClaudeSummarizer(ClaudeSummarizer):
16
+ def __init__(
17
+ self,
18
+ model: str = "us.anthropic.claude-haiku-4-5-20251001-v1:0",
19
+ region: str = "us-east-1",
20
+ assume_role_arn: str | None = None,
21
+ max_tokens: int = 400,
22
+ ) -> None:
23
+ super().__init__(BedrockClient(model, region, assume_role_arn, max_tokens), model)
@@ -0,0 +1,172 @@
1
+ """Provider-neutral Claude pattern judge + summarizer (ENH-003 phase 2).
2
+
3
+ The judge/summarizer logic is identical whether Claude runs on **AWS Bedrock**
4
+ or the **direct Anthropic API**: both return the Anthropic *Messages* response
5
+ shape (``content`` blocks + ``usage``). Only the transport *client* differs. So
6
+ the prompts + parsing live here once, and the per-endpoint modules
7
+ (``bedrock.py``, ``anthropic.py``) just supply a ``ClaudeClient``.
8
+
9
+ Tests drive the deterministic ``ScriptedJudge`` / ``ScriptedSummarizer`` instead;
10
+ this base is exercised with a stub client (no network) plus the env-gated live
11
+ tests.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Any, Protocol, runtime_checkable
17
+
18
+ # ``price_for`` / ``text_of`` are provider-neutral helpers that have lived in
19
+ # ``bedrock_client`` since feat-012; import them here rather than move the file
20
+ # (keeps the public import path stable for existing tests).
21
+ from .bedrock_client import text_of
22
+ from .heuristics import Candidate
23
+ from .judge import Verdict
24
+ from .summarizer import FileContext, Summary
25
+
26
+ _JUDGE_SYSTEM = (
27
+ "You classify a code symbol against GoF and architectural design patterns. "
28
+ "Confirm a pattern ONLY when the symbol's structure clearly supports it; prefer "
29
+ "rejecting over guessing. For each candidate pattern give is_match, a confidence "
30
+ "in [0,1], and a one-sentence rationale that cites the structural evidence."
31
+ )
32
+
33
+ _VERDICT_TOOL = {
34
+ "name": "submit_verdicts",
35
+ "description": "Return one verdict per candidate pattern.",
36
+ "input_schema": {
37
+ "type": "object",
38
+ "properties": {
39
+ "verdicts": {
40
+ "type": "array",
41
+ "items": {
42
+ "type": "object",
43
+ "properties": {
44
+ "pattern": {"type": "string"},
45
+ "is_match": {"type": "boolean"},
46
+ "confidence": {"type": "number"},
47
+ "rationale": {"type": "string"},
48
+ },
49
+ "required": ["pattern", "is_match", "confidence", "rationale"],
50
+ },
51
+ }
52
+ },
53
+ "required": ["verdicts"],
54
+ },
55
+ }
56
+
57
+ _FILE_SYSTEM = (
58
+ "You write a one-paragraph summary of a source file for a developer orienting "
59
+ "in the codebase. State what the file is FOR and the role of its main symbols. "
60
+ "Summarize only what the signatures and names show — do not invent behaviour. "
61
+ "No preamble, no bullet lists."
62
+ )
63
+ _REPO_SYSTEM = (
64
+ "You write a one-paragraph summary of a codebase from its per-file summaries. "
65
+ "State what the system does and how the major pieces fit. No preamble."
66
+ )
67
+
68
+
69
+ @runtime_checkable
70
+ class ClaudeClient(Protocol):
71
+ """One Messages call + cumulative cost. ``BedrockClient`` and
72
+ ``AnthropicClient`` both satisfy this."""
73
+
74
+ cost_usd: float
75
+
76
+ async def invoke(
77
+ self,
78
+ system: str,
79
+ user: str,
80
+ tools: list[dict[str, Any]] | None = None,
81
+ tool_name: str | None = None,
82
+ ) -> dict[str, Any]:
83
+ """Return the raw Messages payload (``content`` blocks + ``usage``)."""
84
+ ...
85
+
86
+
87
+ class ClaudeJudge:
88
+ """Pattern judge over any ``ClaudeClient``. A forced ``submit_verdicts`` tool
89
+ call yields one structured verdict per nominated pattern."""
90
+
91
+ def __init__(self, client: ClaudeClient, model: str) -> None:
92
+ self.model = model
93
+ self._client = client
94
+
95
+ async def judge(self, candidate: Candidate) -> list[Verdict]:
96
+ if not candidate.patterns:
97
+ return []
98
+ payload = await self._client.invoke(
99
+ _JUDGE_SYSTEM,
100
+ self._prompt(candidate),
101
+ tools=[_VERDICT_TOOL],
102
+ tool_name="submit_verdicts",
103
+ )
104
+ nominated = set(candidate.patterns)
105
+ return [v for v in self._parse(payload) if v.pattern in nominated]
106
+
107
+ @property
108
+ def cost_usd(self) -> float:
109
+ return self._client.cost_usd
110
+
111
+ @staticmethod
112
+ def _prompt(c: Candidate) -> str:
113
+ methods = "\n".join(f" - {n}: {sig}" for n, sig in c.methods[:30]) or " (none)"
114
+ return (
115
+ f"{c.kind} `{c.name}`\nsignature: {c.signature}\nmethods:\n{methods}\n\n"
116
+ f"candidate patterns (from structural heuristics): {', '.join(c.patterns)}\n"
117
+ f"evidence: {'; '.join(c.evidence)}\n\n"
118
+ "Return a verdict for EACH candidate pattern."
119
+ )
120
+
121
+ @staticmethod
122
+ def _parse(payload: dict[str, Any]) -> list[Verdict]:
123
+ verdicts: list[Verdict] = []
124
+ for block in payload.get("content", []):
125
+ if block.get("type") == "tool_use":
126
+ for raw in block.get("input", {}).get("verdicts", []):
127
+ try:
128
+ conf = max(0.0, min(1.0, float(raw.get("confidence", 0.0))))
129
+ verdicts.append(
130
+ Verdict(
131
+ pattern=str(raw.get("pattern", "")),
132
+ is_match=bool(raw.get("is_match", False)),
133
+ confidence=conf,
134
+ rationale=str(raw.get("rationale", "")),
135
+ )
136
+ )
137
+ except (TypeError, ValueError):
138
+ continue
139
+ return verdicts
140
+
141
+
142
+ class ClaudeSummarizer:
143
+ """Module/repo summaries over any ``ClaudeClient`` — plain-text completions."""
144
+
145
+ def __init__(self, client: ClaudeClient, model: str) -> None:
146
+ self.model = model
147
+ self._client = client
148
+
149
+ async def summarize_file(self, ctx: FileContext, max_words: int) -> Summary:
150
+ symbols = "\n".join(f" - {n}: {sig}" for n, sig in ctx.symbols[:60]) or " (no symbols)"
151
+ imports = ", ".join(ctx.imports[:20]) or "(none)"
152
+ user = (
153
+ f"File: {ctx.path}\nImports: {imports}\nSymbols:\n{symbols}\n\n"
154
+ f"Summarize this file in at most {max_words} words."
155
+ )
156
+ payload = await self._client.invoke(_FILE_SYSTEM, user)
157
+ return Summary(text=text_of(payload), model=self.model)
158
+
159
+ async def summarize_repo(
160
+ self, repo: str, file_summaries: list[tuple[str, str]], max_words: int
161
+ ) -> Summary:
162
+ joined = "\n".join(f"- {path}: {text}" for path, text in file_summaries[:200])
163
+ user = (
164
+ f"Repository: {repo}\nPer-file summaries:\n{joined}\n\n"
165
+ f"Summarize the whole codebase in at most {max_words} words."
166
+ )
167
+ payload = await self._client.invoke(_REPO_SYSTEM, user)
168
+ return Summary(text=text_of(payload), model=self.model)
169
+
170
+ @property
171
+ def cost_usd(self) -> float:
172
+ return self._client.cost_usd
@@ -0,0 +1,108 @@
1
+ """``PatternTagEnricher`` (feat-012) — orchestrate two-stage pattern tagging.
2
+
3
+ Stage-1 heuristics nominate; the injected ``PatternJudge`` confirms each under a
4
+ ``budget_usd`` cap (the framework ``BudgetPolicy`` breaker — the first feature to
5
+ ride the AgentForge budget rails). Confirmed verdicts above the confidence floor
6
+ become ``PatternTag`` nodes + ``TAGGED`` edges with honest ``llm`` provenance.
7
+ Re-tag is idempotent (clear a judged symbol's old ``TAGGED`` first); a tripped
8
+ budget stops cleanly, leaving unjudged candidates for the next run. This is a
9
+ framework-layer module (ADR-0001: ``enrich`` may import ``agentforge``).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+
16
+ from agentforge_core.production.budget import BudgetPolicy
17
+ from agentforge_core.production.exceptions import BudgetExceeded
18
+
19
+ from agentforge_graph.core import Edge, GraphStore, Node, NodeKind, Provenance
20
+ from agentforge_graph.core.kinds import EdgeKind
21
+
22
+ from .heuristics import PatternHeuristics
23
+ from .judge import PatternJudge
24
+ from .report import EnrichReport
25
+ from .taxonomy import is_pattern, pattern_tag_id
26
+
27
+
28
+ class PatternTagEnricher:
29
+ version = "pattern-tags@1" # bump on prompt/taxonomy change → re-tag
30
+
31
+ def __init__(
32
+ self,
33
+ repo: str,
34
+ judge: PatternJudge,
35
+ *,
36
+ heuristics: PatternHeuristics | None = None,
37
+ confidence_floor: float = 0.7,
38
+ budget_usd: float = 2.0,
39
+ concurrency: int = 6,
40
+ commit: str = "",
41
+ ) -> None:
42
+ self.repo = repo
43
+ self.judge = judge
44
+ self.heuristics = heuristics or PatternHeuristics()
45
+ self.confidence_floor = confidence_floor
46
+ self.budget_usd = budget_usd
47
+ self.concurrency = max(1, concurrency)
48
+ self.commit = commit
49
+ self.last_judged_ids: list[str] = []
50
+
51
+ async def enrich(self, store: GraphStore, symbol_ids: list[str]) -> EnrichReport:
52
+ report = EnrichReport()
53
+ candidates = await self.heuristics.nominate(store, symbol_ids)
54
+ report.candidates = len(candidates)
55
+ self.last_judged_ids = []
56
+ if not candidates:
57
+ return report
58
+
59
+ budget = BudgetPolicy(usd=self.budget_usd, max_tokens=10**12, max_iterations=10**12)
60
+ facts: list[Node | Edge] = []
61
+
62
+ # Judge in concurrent batches (ENH-002): cost is accounted per batch —
63
+ # `budget.check()`/`commit()` sit OUTSIDE the gather, so the shared judge
64
+ # cost is read atomically (no per-call race). Budget overrun is bounded
65
+ # to one batch; concurrency=1 reproduces the strict per-call breaker.
66
+ for start in range(0, len(candidates), self.concurrency):
67
+ batch = candidates[start : start + self.concurrency]
68
+ try:
69
+ budget.check()
70
+ except BudgetExceeded:
71
+ report.budget_tripped = True
72
+ break
73
+ before = self.judge.cost_usd
74
+ batch_verdicts = await asyncio.gather(*(self.judge.judge(c) for c in batch))
75
+ budget.commit(self.judge.cost_usd - before)
76
+ report.cost_usd = round(self.judge.cost_usd, 6)
77
+ for cand, verdicts in zip(batch, batch_verdicts, strict=True):
78
+ report.judged += 1
79
+ self.last_judged_ids.append(cand.symbol_id)
80
+ for v in verdicts:
81
+ if not (
82
+ v.is_match
83
+ and v.confidence >= self.confidence_floor
84
+ and is_pattern(v.pattern)
85
+ ):
86
+ continue
87
+ prov = Provenance.llm(self.version, round(v.confidence, 4), self.commit)
88
+ tag_id = pattern_tag_id(self.repo, v.pattern)
89
+ facts.append(
90
+ Node(id=tag_id, kind=NodeKind.PATTERN_TAG, name=v.pattern, provenance=prov)
91
+ )
92
+ facts.append(
93
+ Edge(
94
+ src=cand.symbol_id,
95
+ dst=tag_id,
96
+ kind=EdgeKind.TAGGED,
97
+ attrs={"confidence": round(v.confidence, 4), "rationale": v.rationale},
98
+ provenance=prov,
99
+ )
100
+ )
101
+ report.tagged += 1
102
+ report.by_pattern[v.pattern] = report.by_pattern.get(v.pattern, 0) + 1
103
+
104
+ # idempotent re-tag: drop judged symbols' old tags, then write the new
105
+ await store.clear_outgoing(self.last_judged_ids, EdgeKind.TAGGED)
106
+ if facts:
107
+ await store.add(facts)
108
+ return report
@@ -0,0 +1,173 @@
1
+ """The ``infer_governs`` LLM matcher (feat-010 follow-up).
2
+
3
+ When an ADR's prose does not name the code it governs by path/symbol, the
4
+ deterministic mention parser produces zero ``GOVERNS`` edges. This optional pass
5
+ asks a model to match a decision's text against the repo's candidate symbols and
6
+ proposes ``GOVERNS`` edges with honest ``llm`` provenance + confidence.
7
+
8
+ The matcher is injectable (the Embedder/PatternJudge pattern): the live
9
+ ``ClaudeGovernsMatcher`` runs over any ``ClaudeClient`` (Bedrock or the Anthropic
10
+ API); the ``ScriptedMatcher`` keeps the enricher deterministic and credential-free
11
+ for tests. This is a framework-layer module (ADR-0001: ``enrich`` may import
12
+ ``agentforge``); the deterministic ``knowledge`` package stays model-free.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Callable
18
+ from typing import Any, Protocol, runtime_checkable
19
+
20
+ from pydantic import BaseModel, Field
21
+
22
+ from .claude import ClaudeClient
23
+
24
+
25
+ class GovernsCandidate(BaseModel):
26
+ """A symbol a decision might govern — what the matcher sees per candidate."""
27
+
28
+ symbol_id: str
29
+ name: str
30
+ kind: str
31
+ signature: str = ""
32
+ path: str = ""
33
+
34
+
35
+ class GovernsMatch(BaseModel):
36
+ """A proposed ``GOVERNS`` link from a decision to one candidate symbol."""
37
+
38
+ symbol_id: str
39
+ confidence: float = Field(ge=0.0, le=1.0)
40
+ rationale: str = ""
41
+
42
+
43
+ @runtime_checkable
44
+ class GovernsMatcher(Protocol):
45
+ async def match(
46
+ self, title: str, text: str, candidates: list[GovernsCandidate]
47
+ ) -> list[GovernsMatch]: ...
48
+
49
+ @property
50
+ def cost_usd(self) -> float:
51
+ """Cumulative USD spent so far (0 for the scripted matcher)."""
52
+ ...
53
+
54
+
55
+ ScriptFn = Callable[[str, str, list[GovernsCandidate]], list[GovernsMatch]]
56
+
57
+
58
+ class ScriptedMatcher:
59
+ """Deterministic matcher for tests. Drive it with a function; the default
60
+ matches nothing. An optional ``per_call_usd`` exercises the budget breaker."""
61
+
62
+ def __init__(self, fn: ScriptFn | None = None, per_call_usd: float = 0.0) -> None:
63
+ self._fn = fn or (lambda title, text, cands: [])
64
+ self._per_call_usd = per_call_usd
65
+ self._cost = 0.0
66
+
67
+ async def match(
68
+ self, title: str, text: str, candidates: list[GovernsCandidate]
69
+ ) -> list[GovernsMatch]:
70
+ self._cost += self._per_call_usd
71
+ return self._fn(title, text, candidates)
72
+
73
+ @property
74
+ def cost_usd(self) -> float:
75
+ return self._cost
76
+
77
+
78
+ _GOVERNS_SYSTEM = (
79
+ "You match an architecture decision record (ADR) to the code symbols it governs. "
80
+ "A symbol is governed when the decision's rules plainly constrain how that symbol "
81
+ "is designed, implemented, or changed. Be conservative: propose a match ONLY when "
82
+ "the decision clearly applies to the symbol — prefer proposing nothing over "
83
+ "guessing. For each match give a confidence in [0,1] and a one-sentence rationale "
84
+ "citing the decision text."
85
+ )
86
+
87
+ _GOVERNS_TOOL = {
88
+ "name": "submit_governs",
89
+ "description": "Return the candidate symbols this decision governs (possibly none).",
90
+ "input_schema": {
91
+ "type": "object",
92
+ "properties": {
93
+ "matches": {
94
+ "type": "array",
95
+ "items": {
96
+ "type": "object",
97
+ "properties": {
98
+ "symbol_index": {"type": "integer"},
99
+ "confidence": {"type": "number"},
100
+ "rationale": {"type": "string"},
101
+ },
102
+ "required": ["symbol_index", "confidence", "rationale"],
103
+ },
104
+ }
105
+ },
106
+ "required": ["matches"],
107
+ },
108
+ }
109
+
110
+
111
+ class ClaudeGovernsMatcher:
112
+ """``infer_governs`` matcher over any ``ClaudeClient``. A forced
113
+ ``submit_governs`` tool call returns candidate indices + confidences."""
114
+
115
+ def __init__(self, client: ClaudeClient, model: str) -> None:
116
+ self.model = model
117
+ self._client = client
118
+
119
+ async def match(
120
+ self, title: str, text: str, candidates: list[GovernsCandidate]
121
+ ) -> list[GovernsMatch]:
122
+ if not candidates:
123
+ return []
124
+ payload = await self._client.invoke(
125
+ _GOVERNS_SYSTEM,
126
+ self._prompt(title, text, candidates),
127
+ tools=[_GOVERNS_TOOL],
128
+ tool_name="submit_governs",
129
+ )
130
+ return self._parse(payload, candidates)
131
+
132
+ @property
133
+ def cost_usd(self) -> float:
134
+ return self._client.cost_usd
135
+
136
+ @staticmethod
137
+ def _prompt(title: str, text: str, candidates: list[GovernsCandidate]) -> str:
138
+ listing = "\n".join(
139
+ f" [{i}] {c.kind} `{c.name}` — {c.path}"
140
+ + (f" :: {c.signature}" if c.signature else "")
141
+ for i, c in enumerate(candidates)
142
+ )
143
+ return (
144
+ f"Decision: {title}\n\n{text.strip()}\n\n"
145
+ f"Candidate symbols (index in brackets):\n{listing}\n\n"
146
+ "Return the candidates this decision governs by their index. "
147
+ "If none clearly apply, return an empty list."
148
+ )
149
+
150
+ @staticmethod
151
+ def _parse(payload: dict[str, Any], candidates: list[GovernsCandidate]) -> list[GovernsMatch]:
152
+ matches: list[GovernsMatch] = []
153
+ seen: set[int] = set()
154
+ for block in payload.get("content", []):
155
+ if block.get("type") != "tool_use":
156
+ continue
157
+ for raw in block.get("input", {}).get("matches", []):
158
+ try:
159
+ idx = int(raw.get("symbol_index", -1))
160
+ except (TypeError, ValueError):
161
+ continue
162
+ if idx < 0 or idx >= len(candidates) or idx in seen:
163
+ continue
164
+ seen.add(idx)
165
+ conf = max(0.0, min(1.0, float(raw.get("confidence", 0.0) or 0.0)))
166
+ matches.append(
167
+ GovernsMatch(
168
+ symbol_id=candidates[idx].symbol_id,
169
+ confidence=conf,
170
+ rationale=str(raw.get("rationale", "")),
171
+ )
172
+ )
173
+ return matches