agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""``DecisionGovernsInferencer`` (feat-010 follow-up) — the optional LLM pass that
|
|
2
|
+
proposes ``GOVERNS`` edges for ADRs whose prose names no code.
|
|
3
|
+
|
|
4
|
+
Only decisions with **zero parsed** ``GOVERNS`` edges are considered (the LLM
|
|
5
|
+
fills the gap the deterministic parser left; it never overrides parsed links).
|
|
6
|
+
Each considered decision's prose is matched against the repo's candidate symbols
|
|
7
|
+
under a ``budget_usd`` cap (the framework ``BudgetPolicy``); matches above the
|
|
8
|
+
confidence floor become ``GOVERNS`` edges with honest ``llm`` provenance. Re-run
|
|
9
|
+
is idempotent — a considered decision's prior ``llm`` GOVERNS are cleared first
|
|
10
|
+
(safe: it has no parsed GOVERNS to clobber). Off by default; ``ckg enrich
|
|
11
|
+
--decisions`` runs it. Framework-layer (ADR-0001: ``enrich`` may import
|
|
12
|
+
``agentforge``).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from agentforge_core.production.budget import BudgetPolicy
|
|
18
|
+
from agentforge_core.production.exceptions import BudgetExceeded
|
|
19
|
+
|
|
20
|
+
from agentforge_graph.core import (
|
|
21
|
+
Edge,
|
|
22
|
+
EdgeKind,
|
|
23
|
+
GraphQuery,
|
|
24
|
+
GraphStore,
|
|
25
|
+
Node,
|
|
26
|
+
NodeKind,
|
|
27
|
+
Provenance,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from .governs import GovernsCandidate, GovernsMatcher
|
|
31
|
+
from .report import GovernsReport
|
|
32
|
+
|
|
33
|
+
_ALL = 10_000_000
|
|
34
|
+
_CANDIDATE_KINDS = {NodeKind.CLASS, NodeKind.FUNCTION, NodeKind.METHOD}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DecisionGovernsInferencer:
|
|
38
|
+
version = "infer-governs@1" # bump on prompt change → re-infer
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
repo: str,
|
|
43
|
+
matcher: GovernsMatcher,
|
|
44
|
+
*,
|
|
45
|
+
confidence_floor: float = 0.7,
|
|
46
|
+
budget_usd: float = 1.0,
|
|
47
|
+
max_candidates: int = 60,
|
|
48
|
+
commit: str = "",
|
|
49
|
+
) -> None:
|
|
50
|
+
self.repo = repo
|
|
51
|
+
self.matcher = matcher
|
|
52
|
+
self.confidence_floor = confidence_floor
|
|
53
|
+
self.budget_usd = budget_usd
|
|
54
|
+
self.max_candidates = max(1, max_candidates)
|
|
55
|
+
self.commit = commit
|
|
56
|
+
|
|
57
|
+
async def enrich(self, store: GraphStore) -> GovernsReport:
|
|
58
|
+
report = GovernsReport()
|
|
59
|
+
nodes = (await store.query(GraphQuery(limit=_ALL))).nodes
|
|
60
|
+
decisions = [n for n in nodes if n.kind is NodeKind.DECISION]
|
|
61
|
+
report.decisions_total = len(decisions)
|
|
62
|
+
if not decisions:
|
|
63
|
+
return report
|
|
64
|
+
|
|
65
|
+
candidates = self._candidates(nodes)
|
|
66
|
+
report.candidates = len(candidates)
|
|
67
|
+
if not candidates:
|
|
68
|
+
return report
|
|
69
|
+
|
|
70
|
+
# only decisions the deterministic parser left ungoverned (no parsed link)
|
|
71
|
+
targets: list[Node] = []
|
|
72
|
+
for d in decisions:
|
|
73
|
+
govs = await store.adjacent(d.id, [EdgeKind.GOVERNS], "out")
|
|
74
|
+
if not any(e.provenance.source == "parsed" for e in govs):
|
|
75
|
+
targets.append(d)
|
|
76
|
+
report.decisions_considered = len(targets)
|
|
77
|
+
if not targets:
|
|
78
|
+
return report
|
|
79
|
+
|
|
80
|
+
budget = BudgetPolicy(usd=self.budget_usd, max_tokens=10**12, max_iterations=10**12)
|
|
81
|
+
facts: list[Node | Edge] = []
|
|
82
|
+
inferred_ids: list[str] = []
|
|
83
|
+
for d in targets:
|
|
84
|
+
try:
|
|
85
|
+
budget.check()
|
|
86
|
+
except BudgetExceeded:
|
|
87
|
+
report.budget_tripped = True
|
|
88
|
+
break
|
|
89
|
+
text = await self._decision_text(store, d.id)
|
|
90
|
+
before = self.matcher.cost_usd
|
|
91
|
+
matches = await self.matcher.match(d.attrs.get("title", d.name), text, candidates)
|
|
92
|
+
budget.commit(self.matcher.cost_usd - before)
|
|
93
|
+
report.cost_usd = round(self.matcher.cost_usd, 6)
|
|
94
|
+
inferred_ids.append(d.id)
|
|
95
|
+
for m in matches:
|
|
96
|
+
if m.confidence < self.confidence_floor:
|
|
97
|
+
continue
|
|
98
|
+
prov = Provenance.llm(self.version, round(m.confidence, 4), self.commit)
|
|
99
|
+
facts.append(
|
|
100
|
+
Edge(
|
|
101
|
+
src=d.id,
|
|
102
|
+
dst=m.symbol_id,
|
|
103
|
+
kind=EdgeKind.GOVERNS,
|
|
104
|
+
attrs={"confidence": round(m.confidence, 4), "rationale": m.rationale},
|
|
105
|
+
provenance=prov,
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
report.governs_inferred += 1
|
|
109
|
+
|
|
110
|
+
# idempotent re-infer: drop considered decisions' prior llm GOVERNS, then
|
|
111
|
+
# write the new ones. Safe because a considered decision has no *parsed*
|
|
112
|
+
# GOVERNS, so this never removes a deterministic link.
|
|
113
|
+
if inferred_ids:
|
|
114
|
+
await store.clear_outgoing(inferred_ids, EdgeKind.GOVERNS)
|
|
115
|
+
if facts:
|
|
116
|
+
await store.add(facts)
|
|
117
|
+
return report
|
|
118
|
+
|
|
119
|
+
def _candidates(self, nodes: list[Node]) -> list[GovernsCandidate]:
|
|
120
|
+
"""Deterministic, bounded candidate set: in-repo Class/Function/Method
|
|
121
|
+
symbols sorted by id, capped. (Repo-map-ranked candidates are a refinement.)"""
|
|
122
|
+
out: list[GovernsCandidate] = []
|
|
123
|
+
for n in sorted(nodes, key=lambda z: z.id):
|
|
124
|
+
if n.kind not in _CANDIDATE_KINDS:
|
|
125
|
+
continue
|
|
126
|
+
from agentforge_graph.core import SymbolID
|
|
127
|
+
|
|
128
|
+
ps = SymbolID.parse(n.id)
|
|
129
|
+
out.append(
|
|
130
|
+
GovernsCandidate(
|
|
131
|
+
symbol_id=n.id,
|
|
132
|
+
name=n.name,
|
|
133
|
+
kind=n.kind.value,
|
|
134
|
+
signature=str(n.attrs.get("signature", "")),
|
|
135
|
+
path=ps.path,
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
if len(out) >= self.max_candidates:
|
|
139
|
+
break
|
|
140
|
+
return out
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
async def _decision_text(store: GraphStore, decision_id: str) -> str:
|
|
144
|
+
"""The decision's prose — its DocChunk bodies, in order, bounded."""
|
|
145
|
+
chunks = [
|
|
146
|
+
n
|
|
147
|
+
for n in await store.neighbors(decision_id, [EdgeKind.CONTAINS], depth=1)
|
|
148
|
+
if n.kind is NodeKind.DOC_CHUNK
|
|
149
|
+
]
|
|
150
|
+
chunks.sort(key=lambda n: int(n.attrs.get("seq", 0)))
|
|
151
|
+
parts = [f"{n.attrs.get('heading', '')}\n{n.attrs.get('text', '')}".strip() for n in chunks]
|
|
152
|
+
return "\n\n".join(p for p in parts if p)[:6000]
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""Stage-1 structural pattern heuristics (feat-012) — deterministic, no LLM.
|
|
2
|
+
|
|
3
|
+
Cheap rules nominate *candidate* patterns for a symbol from its structure (name,
|
|
4
|
+
methods, and graph neighbourhood). Recall over precision here: the LLM judge
|
|
5
|
+
(stage 2) confirms or rejects each nomination, so a spurious candidate costs one
|
|
6
|
+
judge call, while a missed one is never recovered. Framework-free and
|
|
7
|
+
golden-tested. Each candidate carries ``evidence`` strings the judge must weigh,
|
|
8
|
+
so the verdict cites structure (spec §8).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Literal
|
|
16
|
+
|
|
17
|
+
from agentforge_graph.core import EdgeKind, GraphStore, Node, NodeKind, SymbolID
|
|
18
|
+
|
|
19
|
+
# CRUD-ish method names that signal a Repository/DAO.
|
|
20
|
+
_CRUD = {
|
|
21
|
+
"get",
|
|
22
|
+
"find",
|
|
23
|
+
"save",
|
|
24
|
+
"add",
|
|
25
|
+
"create",
|
|
26
|
+
"delete",
|
|
27
|
+
"remove",
|
|
28
|
+
"update",
|
|
29
|
+
"list",
|
|
30
|
+
"all",
|
|
31
|
+
"fetch",
|
|
32
|
+
"insert",
|
|
33
|
+
"query",
|
|
34
|
+
"load",
|
|
35
|
+
"store",
|
|
36
|
+
}
|
|
37
|
+
_FACTORY_VERBS = ("create", "make", "build", "new", "from_", "of")
|
|
38
|
+
_OBSERVER_METHODS = {"notify", "subscribe", "unsubscribe", "update", "register", "emit"}
|
|
39
|
+
|
|
40
|
+
# Role hints keyed by a name/base **suffix** → the pattern it nominates. Applied
|
|
41
|
+
# to a class's own name and to its base classes (ENH-001).
|
|
42
|
+
_ROLE_SUFFIXES: tuple[tuple[tuple[str, ...], str], ...] = (
|
|
43
|
+
(("Repository", "Repo", "DAO", "Store"), "Repository"),
|
|
44
|
+
(("Service", "UseCase", "Interactor"), "Service"),
|
|
45
|
+
(("Controller", "Resource", "Handler", "View"), "Controller"),
|
|
46
|
+
(("Factory",), "Factory"),
|
|
47
|
+
(("Builder",), "Builder"),
|
|
48
|
+
(("Strategy", "Policy"), "Strategy"),
|
|
49
|
+
(("Adapter",), "Adapter"),
|
|
50
|
+
(("Facade",), "Facade"),
|
|
51
|
+
(("Decorator",), "Decorator"),
|
|
52
|
+
(("Observer", "Listener", "Subscriber"), "Observer"),
|
|
53
|
+
)
|
|
54
|
+
# Extra name suffixes considered only in `recall="broad"` mode.
|
|
55
|
+
_BROAD_SUFFIXES: tuple[tuple[tuple[str, ...], str], ...] = (
|
|
56
|
+
(("Manager", "Provider", "Engine", "Coordinator"), "Service"),
|
|
57
|
+
(("Gateway", "Client", "Wrapper", "Proxy"), "Adapter"),
|
|
58
|
+
)
|
|
59
|
+
# Base classes that don't imply an implementable role (skip for the Strategy
|
|
60
|
+
# "implements an interface" broad signal).
|
|
61
|
+
_TRIVIAL_BASES = {
|
|
62
|
+
"object",
|
|
63
|
+
"Exception",
|
|
64
|
+
"BaseException",
|
|
65
|
+
"BaseModel",
|
|
66
|
+
"Enum",
|
|
67
|
+
"StrEnum",
|
|
68
|
+
"IntEnum",
|
|
69
|
+
"Protocol",
|
|
70
|
+
"Generic",
|
|
71
|
+
"ABC",
|
|
72
|
+
"Dict",
|
|
73
|
+
"List",
|
|
74
|
+
"Set",
|
|
75
|
+
"Tuple",
|
|
76
|
+
"NamedTuple",
|
|
77
|
+
"TypedDict",
|
|
78
|
+
"dict",
|
|
79
|
+
"list",
|
|
80
|
+
"set",
|
|
81
|
+
"tuple",
|
|
82
|
+
}
|
|
83
|
+
_CLASS_BASES_RE = re.compile(r"class\s+\w+\s*\(([^)]*)\)")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _base_names(signature: str) -> list[str]:
|
|
87
|
+
"""Base classes parsed from a class signature line (``class X(A, b.C):`` →
|
|
88
|
+
``["A", "C"]``). Avoids needing INHERITS edges in the graph (ENH-001)."""
|
|
89
|
+
m = _CLASS_BASES_RE.search(signature)
|
|
90
|
+
if not m:
|
|
91
|
+
return []
|
|
92
|
+
bases: list[str] = []
|
|
93
|
+
for part in m.group(1).split(","):
|
|
94
|
+
part = part.strip()
|
|
95
|
+
if not part or "=" in part: # skip metaclass=… / keyword bases
|
|
96
|
+
continue
|
|
97
|
+
leaf = part.split(".")[-1].split("[")[0].strip() # abc.ABC→ABC, Generic[T]→Generic
|
|
98
|
+
if leaf[:1].isalpha():
|
|
99
|
+
bases.append(leaf)
|
|
100
|
+
return bases
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class Candidate:
|
|
105
|
+
"""A symbol nominated for one or more patterns, with the structure the judge
|
|
106
|
+
needs (so it doesn't re-query the graph)."""
|
|
107
|
+
|
|
108
|
+
symbol_id: str
|
|
109
|
+
name: str
|
|
110
|
+
kind: str
|
|
111
|
+
signature: str
|
|
112
|
+
methods: list[tuple[str, str]] # (name, signature)
|
|
113
|
+
patterns: list[str] = field(default_factory=list)
|
|
114
|
+
evidence: list[str] = field(default_factory=list)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _suffix(name: str, *suffixes: str) -> bool:
|
|
118
|
+
low = name.lower()
|
|
119
|
+
return any(low.endswith(s.lower()) for s in suffixes)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
Recall = Literal["conservative", "broad"]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class PatternHeuristics:
|
|
126
|
+
"""Nominate candidate patterns for code symbols by structure. ``recall``
|
|
127
|
+
controls breadth: ``conservative`` (default) is name + base-class + shape
|
|
128
|
+
signals; ``broad`` also nominates extra name suffixes and ABC
|
|
129
|
+
implementations (more judge calls, higher recall) — ENH-001."""
|
|
130
|
+
|
|
131
|
+
def __init__(self, recall: Recall = "conservative") -> None:
|
|
132
|
+
self.recall = recall
|
|
133
|
+
|
|
134
|
+
async def nominate(self, store: GraphStore, symbol_ids: list[str]) -> list[Candidate]:
|
|
135
|
+
out: list[Candidate] = []
|
|
136
|
+
for sid in symbol_ids:
|
|
137
|
+
node = await store.get(sid)
|
|
138
|
+
if node is None or node.kind not in (NodeKind.CLASS, NodeKind.FUNCTION):
|
|
139
|
+
continue
|
|
140
|
+
methods = await self._methods(store, node.id) if node.kind is NodeKind.CLASS else []
|
|
141
|
+
cand = Candidate(
|
|
142
|
+
symbol_id=node.id,
|
|
143
|
+
name=node.name,
|
|
144
|
+
kind=node.kind.value,
|
|
145
|
+
signature=str(node.attrs.get("signature", "")),
|
|
146
|
+
methods=methods,
|
|
147
|
+
)
|
|
148
|
+
if node.kind is NodeKind.CLASS:
|
|
149
|
+
await self._class_patterns(store, cand)
|
|
150
|
+
else:
|
|
151
|
+
self._function_patterns(cand)
|
|
152
|
+
if cand.patterns:
|
|
153
|
+
out.append(cand)
|
|
154
|
+
return out
|
|
155
|
+
|
|
156
|
+
async def _methods(self, store: GraphStore, class_id: str) -> list[tuple[str, str]]:
|
|
157
|
+
methods: list[tuple[str, str]] = []
|
|
158
|
+
for edge in await store.adjacent(class_id, [EdgeKind.CONTAINS], "out"):
|
|
159
|
+
m = await store.get(edge.dst)
|
|
160
|
+
if m is not None and m.kind is NodeKind.METHOD:
|
|
161
|
+
methods.append((m.name, str(m.attrs.get("signature", ""))))
|
|
162
|
+
return methods
|
|
163
|
+
|
|
164
|
+
@staticmethod
|
|
165
|
+
def _nominate(c: Candidate, pattern: str, evidence: str) -> None:
|
|
166
|
+
if pattern not in c.patterns:
|
|
167
|
+
c.patterns.append(pattern)
|
|
168
|
+
c.evidence.append(evidence)
|
|
169
|
+
|
|
170
|
+
async def _class_patterns(self, store: GraphStore, c: Candidate) -> None:
|
|
171
|
+
names = {m.lower() for m, _ in c.methods}
|
|
172
|
+
crud = sorted(names & _CRUD)
|
|
173
|
+
bases = _base_names(c.signature)
|
|
174
|
+
|
|
175
|
+
# --- name-suffix signals ---
|
|
176
|
+
for suffixes, pattern in _ROLE_SUFFIXES:
|
|
177
|
+
if _suffix(c.name, *suffixes):
|
|
178
|
+
self._nominate(c, pattern, f"name ends with a {pattern} suffix ({c.name})")
|
|
179
|
+
|
|
180
|
+
# --- base-class signals (subclass of a role-named ABC) — ENH-001 ---
|
|
181
|
+
for base in bases:
|
|
182
|
+
for suffixes, pattern in _ROLE_SUFFIXES:
|
|
183
|
+
if _suffix(base, *suffixes):
|
|
184
|
+
self._nominate(c, pattern, f"inherits {base} (a {pattern})")
|
|
185
|
+
|
|
186
|
+
# --- shape signals ---
|
|
187
|
+
if len(crud) >= (1 if self.recall == "broad" else 2):
|
|
188
|
+
self._nominate(c, "Repository", f"has CRUD-shaped methods: {', '.join(crud)}")
|
|
189
|
+
if any(m.lower().startswith(_FACTORY_VERBS) for m, _ in c.methods):
|
|
190
|
+
self._nominate(c, "Factory", "factory-verb methods (create/make/build/…)")
|
|
191
|
+
if "build" in names and any(
|
|
192
|
+
m.lower().startswith(("with_", "set_", "add_")) for m, _ in c.methods
|
|
193
|
+
):
|
|
194
|
+
self._nominate(c, "Builder", "a build() method with fluent with_/set_ methods")
|
|
195
|
+
if "get_instance" in names or "instance" in names:
|
|
196
|
+
self._nominate(c, "Singleton", "get_instance/instance accessor")
|
|
197
|
+
if names & _OBSERVER_METHODS:
|
|
198
|
+
self._nominate(c, "Observer", "observer-shaped methods (notify/subscribe/…)")
|
|
199
|
+
|
|
200
|
+
behaviour = [m for m, _ in c.methods if not m.startswith("__")]
|
|
201
|
+
if not behaviour and (c.methods or _suffix(c.name, "DTO", "Dto", "ValueObject", "VO")):
|
|
202
|
+
tag = "DTO" if _suffix(c.name, "DTO", "Dto") else "ValueObject"
|
|
203
|
+
self._nominate(c, tag, "data-only class (no behaviour methods)")
|
|
204
|
+
|
|
205
|
+
# --- broad mode: extra suffixes + ABC-implementation as Strategy ---
|
|
206
|
+
if self.recall == "broad":
|
|
207
|
+
for suffixes, pattern in _BROAD_SUFFIXES:
|
|
208
|
+
if _suffix(c.name, *suffixes):
|
|
209
|
+
self._nominate(c, pattern, f"name ends with {pattern}-ish suffix ({c.name})")
|
|
210
|
+
implementable = [b for b in bases if b not in _TRIVIAL_BASES]
|
|
211
|
+
if implementable and behaviour:
|
|
212
|
+
self._nominate(c, "Strategy", f"implements interface(s) {', '.join(implementable)}")
|
|
213
|
+
|
|
214
|
+
def _function_patterns(self, c: Candidate) -> None:
|
|
215
|
+
low = c.name.lower()
|
|
216
|
+
if low.startswith(_FACTORY_VERBS) and not low.startswith("__"):
|
|
217
|
+
c.patterns.append("Factory")
|
|
218
|
+
c.evidence.append(f"factory-verb function name ({c.name})")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def class_and_function_ids(nodes: list[Node]) -> list[str]:
|
|
222
|
+
"""Symbol ids eligible for pattern tagging (Class/Function), stable order."""
|
|
223
|
+
kinds = {NodeKind.CLASS, NodeKind.FUNCTION}
|
|
224
|
+
return sorted(n.id for n in nodes if n.kind in kinds and SymbolID.parse(n.id).descriptor)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""The pattern judge interface (feat-012) — stage 2.
|
|
2
|
+
|
|
3
|
+
A ``PatternJudge`` confirms or rejects a stage-1 ``Candidate``'s nominated
|
|
4
|
+
patterns, returning a ``Verdict`` per pattern with confidence + rationale. The
|
|
5
|
+
interface is injectable (the Embedder/FakeEmbedder pattern): the live
|
|
6
|
+
``BedrockClaudeJudge`` (``bedrock.py``) is the only model-calling class, while
|
|
7
|
+
``ScriptedJudge`` keeps the whole enricher deterministic and testable.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from typing import Protocol, runtime_checkable
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
16
|
+
|
|
17
|
+
from .heuristics import Candidate
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Verdict(BaseModel):
|
|
21
|
+
pattern: str
|
|
22
|
+
is_match: bool
|
|
23
|
+
confidence: float = Field(ge=0.0, le=1.0)
|
|
24
|
+
rationale: str = ""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@runtime_checkable
|
|
28
|
+
class PatternJudge(Protocol):
|
|
29
|
+
async def judge(self, candidate: Candidate) -> list[Verdict]: ...
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def cost_usd(self) -> float:
|
|
33
|
+
"""Cumulative USD spent so far (0 for the scripted judge)."""
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
ScriptFn = Callable[[Candidate], list[Verdict]]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ScriptedJudge:
|
|
41
|
+
"""Deterministic judge for tests. Drive it with a function, or with the
|
|
42
|
+
default that confirms every nominated pattern at a fixed confidence. An
|
|
43
|
+
optional ``per_call_usd`` lets a test exercise the budget breaker."""
|
|
44
|
+
|
|
45
|
+
def __init__(self, fn: ScriptFn | None = None, per_call_usd: float = 0.0) -> None:
|
|
46
|
+
self._fn = fn or self._confirm_all
|
|
47
|
+
self._per_call_usd = per_call_usd
|
|
48
|
+
self._cost = 0.0
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _confirm_all(candidate: Candidate) -> list[Verdict]:
|
|
52
|
+
return [
|
|
53
|
+
Verdict(pattern=p, is_match=True, confidence=0.9, rationale="scripted")
|
|
54
|
+
for p in candidate.patterns
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
async def judge(self, candidate: Candidate) -> list[Verdict]:
|
|
58
|
+
self._cost += self._per_call_usd
|
|
59
|
+
return self._fn(candidate)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def cost_usd(self) -> float:
|
|
63
|
+
return self._cost
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Resolve enrichment models (pattern judge + summarizer) from ``EnrichConfig``
|
|
2
|
+
via the provider registry.
|
|
3
|
+
|
|
4
|
+
A single ``enrich.provider`` name selects both roles. Built-ins (``bedrock``,
|
|
5
|
+
``anthropic``, ``scripted``) are registered below; third-party providers register
|
|
6
|
+
out-of-tree under the ``agentforge_graph.judge_providers`` /
|
|
7
|
+
``…summarizer_providers`` entry-point groups (``pip install`` + one
|
|
8
|
+
``enrich.provider`` line, no core change). ``bedrock`` (boto3) and ``anthropic``
|
|
9
|
+
(the anthropic SDK) are imported lazily so the scripted/offline path needs
|
|
10
|
+
neither. ``anthropic`` is the direct Anthropic-API path for non-AWS users
|
|
11
|
+
(ENH-003 phase 2). ``scripted`` is the deterministic, credential-free provider
|
|
12
|
+
for CI and local runs without a model.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
|
|
19
|
+
from agentforge_graph.config import EnrichConfig
|
|
20
|
+
from agentforge_graph.providers import resolve_provider
|
|
21
|
+
|
|
22
|
+
from .governs import GovernsMatcher
|
|
23
|
+
from .judge import PatternJudge
|
|
24
|
+
from .summarizer import Summarizer
|
|
25
|
+
|
|
26
|
+
JUDGE_GROUP = "agentforge_graph.judge_providers"
|
|
27
|
+
SUMMARIZER_GROUP = "agentforge_graph.summarizer_providers"
|
|
28
|
+
GOVERNS_GROUP = "agentforge_graph.governs_matcher_providers"
|
|
29
|
+
|
|
30
|
+
JudgeBuilder = Callable[[EnrichConfig], PatternJudge]
|
|
31
|
+
SummarizerBuilder = Callable[[EnrichConfig], Summarizer]
|
|
32
|
+
GovernsMatcherBuilder = Callable[[EnrichConfig], GovernsMatcher]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _build_scripted_judge(cfg: EnrichConfig) -> PatternJudge:
|
|
36
|
+
from .judge import ScriptedJudge
|
|
37
|
+
|
|
38
|
+
return ScriptedJudge()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _build_bedrock_judge(cfg: EnrichConfig) -> PatternJudge:
|
|
42
|
+
from .bedrock import BedrockClaudeJudge # lazy: only needs boto3 on this path
|
|
43
|
+
|
|
44
|
+
return BedrockClaudeJudge(cfg.model, cfg.region, cfg.assume_role_arn or None)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _build_anthropic_judge(cfg: EnrichConfig) -> PatternJudge:
|
|
48
|
+
from .anthropic import AnthropicClaudeJudge # lazy: only needs the anthropic SDK here
|
|
49
|
+
|
|
50
|
+
return AnthropicClaudeJudge(
|
|
51
|
+
cfg.model, api_key_env=cfg.api_key_env or "ANTHROPIC_API_KEY", base_url=cfg.base_url
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _build_scripted_summarizer(cfg: EnrichConfig) -> Summarizer:
|
|
56
|
+
from .summarizer import ScriptedSummarizer
|
|
57
|
+
|
|
58
|
+
return ScriptedSummarizer()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _build_bedrock_summarizer(cfg: EnrichConfig) -> Summarizer:
|
|
62
|
+
from .bedrock_summarizer import BedrockClaudeSummarizer # lazy: boto3 on this path
|
|
63
|
+
|
|
64
|
+
return BedrockClaudeSummarizer(cfg.model, cfg.region, cfg.assume_role_arn or None)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _build_anthropic_summarizer(cfg: EnrichConfig) -> Summarizer:
|
|
68
|
+
from .anthropic import AnthropicClaudeSummarizer # lazy: anthropic SDK on this path
|
|
69
|
+
|
|
70
|
+
return AnthropicClaudeSummarizer(
|
|
71
|
+
cfg.model, api_key_env=cfg.api_key_env or "ANTHROPIC_API_KEY", base_url=cfg.base_url
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _build_scripted_governs_matcher(cfg: EnrichConfig) -> GovernsMatcher:
|
|
76
|
+
from .governs import ScriptedMatcher
|
|
77
|
+
|
|
78
|
+
return ScriptedMatcher()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _build_bedrock_governs_matcher(cfg: EnrichConfig) -> GovernsMatcher:
|
|
82
|
+
from .bedrock_client import BedrockClient # lazy: boto3 only on this path
|
|
83
|
+
from .governs import ClaudeGovernsMatcher
|
|
84
|
+
|
|
85
|
+
client = BedrockClient(cfg.model, cfg.region, cfg.assume_role_arn or None)
|
|
86
|
+
return ClaudeGovernsMatcher(client, cfg.model)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _build_anthropic_governs_matcher(cfg: EnrichConfig) -> GovernsMatcher:
|
|
90
|
+
from .anthropic_client import AnthropicClient # lazy: anthropic SDK only here
|
|
91
|
+
from .governs import ClaudeGovernsMatcher
|
|
92
|
+
|
|
93
|
+
client = AnthropicClient(cfg.model, cfg.api_key_env or "ANTHROPIC_API_KEY", cfg.base_url)
|
|
94
|
+
return ClaudeGovernsMatcher(client, client.model)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
_JUDGE_BUILTINS: dict[str, JudgeBuilder] = {
|
|
98
|
+
"bedrock": _build_bedrock_judge,
|
|
99
|
+
"anthropic": _build_anthropic_judge,
|
|
100
|
+
"scripted": _build_scripted_judge,
|
|
101
|
+
}
|
|
102
|
+
_SUMMARIZER_BUILTINS: dict[str, SummarizerBuilder] = {
|
|
103
|
+
"bedrock": _build_bedrock_summarizer,
|
|
104
|
+
"anthropic": _build_anthropic_summarizer,
|
|
105
|
+
"scripted": _build_scripted_summarizer,
|
|
106
|
+
}
|
|
107
|
+
_GOVERNS_BUILTINS: dict[str, GovernsMatcherBuilder] = {
|
|
108
|
+
"bedrock": _build_bedrock_governs_matcher,
|
|
109
|
+
"anthropic": _build_anthropic_governs_matcher,
|
|
110
|
+
"scripted": _build_scripted_governs_matcher,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def judge_from_config(cfg: EnrichConfig) -> PatternJudge:
|
|
115
|
+
"""Construct the ``PatternJudge`` selected by ``cfg.provider`` via the registry."""
|
|
116
|
+
builder = resolve_provider(cfg.provider, _JUDGE_BUILTINS, JUDGE_GROUP, role="judge")
|
|
117
|
+
return builder(cfg)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def summarizer_from_config(cfg: EnrichConfig) -> Summarizer:
|
|
121
|
+
"""Construct the ``Summarizer`` selected by ``cfg.provider`` via the registry."""
|
|
122
|
+
builder = resolve_provider(
|
|
123
|
+
cfg.provider, _SUMMARIZER_BUILTINS, SUMMARIZER_GROUP, role="summarizer"
|
|
124
|
+
)
|
|
125
|
+
return builder(cfg)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def governs_matcher_from_config(cfg: EnrichConfig) -> GovernsMatcher:
|
|
129
|
+
"""Construct the ``GovernsMatcher`` selected by ``cfg.provider`` (feat-010)."""
|
|
130
|
+
builder = resolve_provider(
|
|
131
|
+
cfg.provider, _GOVERNS_BUILTINS, GOVERNS_GROUP, role="governs_matcher"
|
|
132
|
+
)
|
|
133
|
+
return builder(cfg)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Result types for LLM enrichment (feat-012)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EnrichReport(BaseModel):
|
|
9
|
+
"""Outcome of one ``PatternTagEnricher.enrich`` run."""
|
|
10
|
+
|
|
11
|
+
candidates: int = 0 # symbols the heuristics nominated
|
|
12
|
+
judged: int = 0 # candidates sent to the judge
|
|
13
|
+
tagged: int = 0 # TAGGED edges written (confirmed, above floor)
|
|
14
|
+
cost_usd: float = 0.0
|
|
15
|
+
budget_tripped: bool = False
|
|
16
|
+
by_pattern: dict[str, int] = Field(default_factory=dict)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TaggedInfo(BaseModel):
|
|
20
|
+
"""One symbol's pattern tag, for ``CodeGraph.tagged`` / ``ckg_explain``."""
|
|
21
|
+
|
|
22
|
+
symbol_id: str
|
|
23
|
+
pattern: str
|
|
24
|
+
confidence: float
|
|
25
|
+
rationale: str
|
|
26
|
+
|
|
27
|
+
def to_dict(self) -> dict[str, object]:
|
|
28
|
+
return self.model_dump()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SummaryReport(BaseModel):
|
|
32
|
+
"""Outcome of one ``SummaryEnricher.enrich`` run."""
|
|
33
|
+
|
|
34
|
+
files_summarized: int = 0
|
|
35
|
+
repo_summarized: bool = False
|
|
36
|
+
cost_usd: float = 0.0
|
|
37
|
+
budget_tripped: bool = False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class GovernsReport(BaseModel):
|
|
41
|
+
"""Outcome of one ``DecisionGovernsInferencer.enrich`` run (feat-010)."""
|
|
42
|
+
|
|
43
|
+
decisions_total: int = 0 # Decision nodes in the graph
|
|
44
|
+
decisions_considered: int = 0 # those with zero *parsed* GOVERNS (the LLM gap)
|
|
45
|
+
candidates: int = 0 # symbols offered to the matcher
|
|
46
|
+
governs_inferred: int = 0 # llm GOVERNS edges written (above the floor)
|
|
47
|
+
cost_usd: float = 0.0
|
|
48
|
+
budget_tripped: bool = False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SummaryInfo(BaseModel):
|
|
52
|
+
"""One summary, for ``CodeGraph.summaries`` / ``ckg_explain``."""
|
|
53
|
+
|
|
54
|
+
target: str # the symbol/file/repo node id it summarizes
|
|
55
|
+
level: str
|
|
56
|
+
text: str
|
|
57
|
+
path: str
|
|
58
|
+
|
|
59
|
+
def to_dict(self) -> dict[str, object]:
|
|
60
|
+
return self.model_dump()
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""The summarizer interface (feat-012 summaries) — injectable like PatternJudge.
|
|
2
|
+
|
|
3
|
+
``BedrockClaudeSummarizer`` is the live adapter; ``ScriptedSummarizer`` keeps the
|
|
4
|
+
bottom-up enricher (ordering, embedding, budget, idempotency) deterministic in
|
|
5
|
+
CI with no model.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Protocol, runtime_checkable
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class FileContext:
|
|
19
|
+
path: str
|
|
20
|
+
symbols: list[tuple[str, str]] = field(default_factory=list) # (name, signature)
|
|
21
|
+
imports: list[str] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Summary(BaseModel):
|
|
25
|
+
text: str
|
|
26
|
+
model: str = ""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@runtime_checkable
|
|
30
|
+
class Summarizer(Protocol):
|
|
31
|
+
async def summarize_file(self, ctx: FileContext, max_words: int) -> Summary: ...
|
|
32
|
+
|
|
33
|
+
async def summarize_repo(
|
|
34
|
+
self, repo: str, file_summaries: list[tuple[str, str]], max_words: int
|
|
35
|
+
) -> Summary: ...
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def cost_usd(self) -> float: ...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
FileFn = Callable[[FileContext], str]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ScriptedSummarizer:
|
|
45
|
+
"""Deterministic summarizer for tests. The default derives a stable string
|
|
46
|
+
from the context; pass ``fn`` to script file summaries."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, fn: FileFn | None = None) -> None:
|
|
49
|
+
self._fn = fn or (lambda ctx: f"summary of {ctx.path} ({len(ctx.symbols)} symbols)")
|
|
50
|
+
self._cost = 0.0
|
|
51
|
+
|
|
52
|
+
async def summarize_file(self, ctx: FileContext, max_words: int) -> Summary:
|
|
53
|
+
return Summary(text=self._fn(ctx), model="scripted")
|
|
54
|
+
|
|
55
|
+
async def summarize_repo(
|
|
56
|
+
self, repo: str, file_summaries: list[tuple[str, str]], max_words: int
|
|
57
|
+
) -> Summary:
|
|
58
|
+
return Summary(text=f"repo {repo}: {len(file_summaries)} files", model="scripted")
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def cost_usd(self) -> float:
|
|
62
|
+
return self._cost
|