agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Result types for ADR/knowledge ingestion (feat-010)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class KnowledgeStats(BaseModel):
|
|
9
|
+
"""Outcome of one ``KnowledgeIngestor.ingest`` run."""
|
|
10
|
+
|
|
11
|
+
decisions_indexed: int = 0
|
|
12
|
+
governs_resolved: int = 0 # GOVERNS edges created from unambiguous mentions
|
|
13
|
+
mentions_unresolved: int = 0 # mentions seen but not linked (unknown/ambiguous)
|
|
14
|
+
docs_indexed: int = 0 # general doc files ingested (doc_globs, feat-010)
|
|
15
|
+
describes_resolved: int = 0 # DESCRIBES edges created from doc mentions
|
|
16
|
+
commits_indexed: int = 0 # commit messages ingested as DocChunks (feat-010)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DecisionInfo(BaseModel):
|
|
20
|
+
"""One decision, for ``CodeGraph.decisions`` / ``ckg decisions`` / the
|
|
21
|
+
``ckg_decisions`` tool."""
|
|
22
|
+
|
|
23
|
+
id: str
|
|
24
|
+
adr_id: str
|
|
25
|
+
title: str
|
|
26
|
+
status: str
|
|
27
|
+
date: str
|
|
28
|
+
path: str
|
|
29
|
+
governs: list[str] = Field(default_factory=list) # node ids this decision governs
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> dict[str, object]:
|
|
32
|
+
return self.model_dump()
|
agentforge_graph/main.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# AGENTFORGE-MANAGED: template:minimal@0.2.4 hash:c05d490b5a1c
|
|
2
|
+
"""Entry point for the ``ckg`` / ``agentforge-graph`` console script.
|
|
3
|
+
|
|
4
|
+
Dispatches to the CKG command-line interface (``ckg index`` today;
|
|
5
|
+
``serve-mcp`` and friends land with feat-008). Engine config is read from
|
|
6
|
+
``ckg.yaml``; framework config from ``agentforge.yaml``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
|
|
13
|
+
from agentforge_graph.cli import main as cli_main
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main() -> None:
|
|
17
|
+
sys.exit(cli_main())
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if __name__ == "__main__":
|
|
21
|
+
main()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Generic model-provider registry: a config provider-name → builder callable.
|
|
2
|
+
|
|
3
|
+
Mirrors the storage driver registry (``store/registry.py``) for the model layer
|
|
4
|
+
(embedders, judges, summarizers). Built-in providers are registered in each
|
|
5
|
+
role's ``_BUILTINS`` map; third-party providers register out-of-tree via
|
|
6
|
+
entry-point groups, so they install as ``pip install`` + one config line with no
|
|
7
|
+
core change.
|
|
8
|
+
|
|
9
|
+
Engine-shared, deterministic — must not import ``agentforge`` (ADR-0001). Only
|
|
10
|
+
stdlib ``importlib.metadata`` is used here.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from importlib.metadata import entry_points
|
|
16
|
+
from typing import cast
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ProviderNotFound(ValueError):
|
|
20
|
+
"""Raised when a config provider name matches no built-in and no entry point."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def resolve_provider[T](name: str, builtins: dict[str, T], group: str, *, role: str) -> T:
|
|
24
|
+
"""Return the builder registered for ``name`` — a built-in first, otherwise an
|
|
25
|
+
entry point in ``group``. Raises ``ProviderNotFound`` (listing the built-ins
|
|
26
|
+
and the entry-point group) when nothing matches."""
|
|
27
|
+
if name in builtins:
|
|
28
|
+
return builtins[name]
|
|
29
|
+
for ep in entry_points(group=group):
|
|
30
|
+
if ep.name == name:
|
|
31
|
+
return cast(T, ep.load())
|
|
32
|
+
known = sorted(builtins)
|
|
33
|
+
raise ProviderNotFound(
|
|
34
|
+
f"unknown {role} provider {name!r}; built-in providers: {known} "
|
|
35
|
+
f"(third-party providers register under the {group!r} entry-point group)"
|
|
36
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""agentforge_graph.repomap — Aider-style budget-aware repo map (feat-007).
|
|
2
|
+
|
|
3
|
+
Personalized PageRank over the symbol graph → token-budgeted signature
|
|
4
|
+
summary. Deterministic, LLM-free; imports nothing from ``agentforge``
|
|
5
|
+
(ADR-0001).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from .rank import RankedSymbol
|
|
11
|
+
from .render import render_map
|
|
12
|
+
from .repomap import RepoMap
|
|
13
|
+
|
|
14
|
+
__all__ = ["RankedSymbol", "RepoMap", "render_map"]
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Rank symbols by structural importance: project a provenance-weighted
|
|
2
|
+
symbol→symbol digraph (CALLS/REFERENCES/INHERITS) and run (personalized)
|
|
3
|
+
PageRank — Aider's recipe. Deterministic and LLM-free.
|
|
4
|
+
|
|
5
|
+
PageRank is a small dependency-free power iteration (networkx's `pagerank`
|
|
6
|
+
pulls in scipy/numpy, which we don't want in the engine for a 20-line algo).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
|
|
16
|
+
from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, Source, SymbolID
|
|
17
|
+
from agentforge_graph.store import Store
|
|
18
|
+
|
|
19
|
+
_RANK_EDGES = [EdgeKind.CALLS, EdgeKind.REFERENCES, EdgeKind.INHERITS]
|
|
20
|
+
_ALL = 10_000_000
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RankedSymbol(BaseModel):
|
|
24
|
+
id: str
|
|
25
|
+
name: str
|
|
26
|
+
kind: NodeKind
|
|
27
|
+
path: str
|
|
28
|
+
rank: float
|
|
29
|
+
signature: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _edge_weight(weights: dict[str, float], source: Source) -> float:
|
|
33
|
+
return weights.get(source.value, 0.5)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _is_private_name(name: str) -> bool:
|
|
37
|
+
"""A leading-underscore name is private — except dunders (``__init__``,
|
|
38
|
+
``__call__``), which are public protocol surface."""
|
|
39
|
+
return name.startswith("_") and not (name.startswith("__") and name.endswith("__"))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _is_private_module(path: str) -> bool:
|
|
43
|
+
"""A ``_``-prefixed module is internal (``_compat.py``, ``_winconsole.py``).
|
|
44
|
+
``__init__`` is the package root — the de-facto public surface, not private."""
|
|
45
|
+
stem = path.rsplit("/", 1)[-1].split(".", 1)[0]
|
|
46
|
+
return stem.startswith("_") and stem != "__init__"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _privacy_multiplier(name: str, path: str, public_bias: float) -> float:
|
|
50
|
+
"""ENH-007: a display-rank weight (not a filter) that demotes clearly-private
|
|
51
|
+
symbols. ``public_bias`` in [0, 1]; 0 disables. Private → ``1 - public_bias``."""
|
|
52
|
+
if public_bias <= 0.0:
|
|
53
|
+
return 1.0
|
|
54
|
+
if _is_private_name(name) or _is_private_module(path):
|
|
55
|
+
return max(0.0, 1.0 - public_bias)
|
|
56
|
+
return 1.0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _pagerank(
|
|
60
|
+
nodes: list[str],
|
|
61
|
+
out_edges: dict[str, dict[str, float]],
|
|
62
|
+
damping: float,
|
|
63
|
+
personalization: dict[str, float] | None,
|
|
64
|
+
max_iter: int = 100,
|
|
65
|
+
tol: float = 1e-9,
|
|
66
|
+
) -> dict[str, float]:
|
|
67
|
+
n = len(nodes)
|
|
68
|
+
if n == 0:
|
|
69
|
+
return {}
|
|
70
|
+
if personalization and sum(personalization.values()) > 0:
|
|
71
|
+
total = sum(personalization.values())
|
|
72
|
+
teleport = {nid: personalization.get(nid, 0.0) / total for nid in nodes}
|
|
73
|
+
else:
|
|
74
|
+
teleport = {nid: 1.0 / n for nid in nodes}
|
|
75
|
+
rank = {nid: 1.0 / n for nid in nodes}
|
|
76
|
+
out_sum = {src: sum(dsts.values()) for src, dsts in out_edges.items()}
|
|
77
|
+
for _ in range(max_iter):
|
|
78
|
+
nxt = {nid: (1.0 - damping) * teleport[nid] for nid in nodes}
|
|
79
|
+
dangling = sum(rank[nid] for nid in nodes if out_sum.get(nid, 0.0) == 0.0)
|
|
80
|
+
for nid in nodes:
|
|
81
|
+
nxt[nid] += damping * dangling * teleport[nid]
|
|
82
|
+
for src, dsts in out_edges.items():
|
|
83
|
+
total = out_sum[src]
|
|
84
|
+
if total == 0.0:
|
|
85
|
+
continue
|
|
86
|
+
share = damping * rank[src] / total
|
|
87
|
+
for dst, weight in dsts.items():
|
|
88
|
+
nxt[dst] += share * weight
|
|
89
|
+
err = sum(abs(nxt[nid] - rank[nid]) for nid in nodes)
|
|
90
|
+
rank = nxt
|
|
91
|
+
if err < tol:
|
|
92
|
+
break
|
|
93
|
+
return rank
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
async def rank_symbols(
|
|
97
|
+
store: Store,
|
|
98
|
+
kinds: list[NodeKind],
|
|
99
|
+
damping: float,
|
|
100
|
+
edge_weights: dict[str, float],
|
|
101
|
+
focus: Sequence[str] | None = None,
|
|
102
|
+
scope: str | None = None,
|
|
103
|
+
public_bias: float = 0.0,
|
|
104
|
+
) -> list[RankedSymbol]:
|
|
105
|
+
nodes = (await store.graph.query(GraphQuery(kinds=kinds, limit=_ALL))).nodes
|
|
106
|
+
if scope is not None:
|
|
107
|
+
nodes = [n for n in nodes if SymbolID.parse(n.id).path.startswith(scope)]
|
|
108
|
+
by_id = {n.id: n for n in nodes}
|
|
109
|
+
if not by_id:
|
|
110
|
+
return []
|
|
111
|
+
|
|
112
|
+
out_edges: dict[str, dict[str, float]] = defaultdict(dict)
|
|
113
|
+
for node in nodes:
|
|
114
|
+
for edge in await store.graph.adjacent(node.id, _RANK_EDGES, "out"):
|
|
115
|
+
if edge.dst not in by_id:
|
|
116
|
+
continue
|
|
117
|
+
w = _edge_weight(edge_weights, edge.provenance.source)
|
|
118
|
+
out_edges[edge.src][edge.dst] = out_edges[edge.src].get(edge.dst, 0.0) + w
|
|
119
|
+
|
|
120
|
+
personalization = None
|
|
121
|
+
if focus:
|
|
122
|
+
focus_ids = _expand_focus(focus, set(by_id))
|
|
123
|
+
if focus_ids:
|
|
124
|
+
personalization = {nid: (1.0 if nid in focus_ids else 0.0) for nid in by_id}
|
|
125
|
+
|
|
126
|
+
scores = _pagerank(list(by_id), dict(out_edges), damping, personalization)
|
|
127
|
+
ranked = []
|
|
128
|
+
for node in nodes:
|
|
129
|
+
path = SymbolID.parse(node.id).path
|
|
130
|
+
# ENH-007: bias the *display* rank toward the public API. Applied after
|
|
131
|
+
# PageRank so the graph propagation is unchanged — private hubs still
|
|
132
|
+
# pass their centrality on; they just sort lower themselves.
|
|
133
|
+
rank = scores.get(node.id, 0.0) * _privacy_multiplier(node.name, path, public_bias)
|
|
134
|
+
ranked.append(
|
|
135
|
+
RankedSymbol(
|
|
136
|
+
id=node.id,
|
|
137
|
+
name=node.name,
|
|
138
|
+
kind=node.kind,
|
|
139
|
+
path=path,
|
|
140
|
+
rank=rank,
|
|
141
|
+
signature=str(node.attrs.get("signature", "")),
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
ranked.sort(key=lambda r: (-r.rank, r.id)) # id tiebreak for determinism
|
|
145
|
+
return ranked
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _expand_focus(focus: Sequence[str], ids: set[str]) -> set[str]:
|
|
149
|
+
matched: set[str] = set()
|
|
150
|
+
paths: set[str] = set()
|
|
151
|
+
for f in focus:
|
|
152
|
+
if f in ids:
|
|
153
|
+
matched.add(f)
|
|
154
|
+
else:
|
|
155
|
+
paths.add(f)
|
|
156
|
+
if paths:
|
|
157
|
+
for nid in ids:
|
|
158
|
+
p = SymbolID.parse(nid).path
|
|
159
|
+
if any(p == fp or p.startswith(fp) for fp in paths):
|
|
160
|
+
matched.add(nid)
|
|
161
|
+
return matched
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Pack ranked symbols into a token budget, grouped by file (files ordered by
|
|
2
|
+
their top symbol's rank). Whole signature lines only; a final note reports how
|
|
3
|
+
many symbols fell below the budget — never a silent cap."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from agentforge_graph.chunking import estimate_tokens
|
|
8
|
+
|
|
9
|
+
from .rank import RankedSymbol
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def render_map(
|
|
13
|
+
ranked: list[RankedSymbol],
|
|
14
|
+
budget_tokens: int,
|
|
15
|
+
summaries: dict[str, str] | None = None,
|
|
16
|
+
) -> str:
|
|
17
|
+
summaries = summaries or {}
|
|
18
|
+
by_file: dict[str, list[RankedSymbol]] = {}
|
|
19
|
+
order: list[str] = []
|
|
20
|
+
for r in ranked:
|
|
21
|
+
if r.path not in by_file:
|
|
22
|
+
by_file[r.path] = []
|
|
23
|
+
order.append(r.path)
|
|
24
|
+
by_file[r.path].append(r)
|
|
25
|
+
|
|
26
|
+
lines: list[str] = []
|
|
27
|
+
emitted = 0
|
|
28
|
+
full = False
|
|
29
|
+
|
|
30
|
+
def fits(extra: list[str]) -> bool:
|
|
31
|
+
# measure the whole accumulated content (estimate_tokens is non-additive)
|
|
32
|
+
return estimate_tokens("\n".join(lines + extra)) <= budget_tokens
|
|
33
|
+
|
|
34
|
+
for path in order:
|
|
35
|
+
if full:
|
|
36
|
+
break
|
|
37
|
+
header = f"{path}:"
|
|
38
|
+
# a one-line file summary (feat-012) under the header, when present
|
|
39
|
+
summary = summaries.get(path)
|
|
40
|
+
head: list[str] = [header, f" # {summary}"] if summary else [header]
|
|
41
|
+
started = False
|
|
42
|
+
for r in by_file[path]:
|
|
43
|
+
line = f" {r.signature or f'{r.name}(...)'}"
|
|
44
|
+
trial = [line] if started else [*head, line]
|
|
45
|
+
if not fits(trial):
|
|
46
|
+
full = True
|
|
47
|
+
break
|
|
48
|
+
lines.extend(trial)
|
|
49
|
+
started = True
|
|
50
|
+
emitted += 1
|
|
51
|
+
|
|
52
|
+
remaining = len(ranked) - emitted
|
|
53
|
+
if remaining > 0:
|
|
54
|
+
lines.append(f"… {remaining} more symbols below the budget")
|
|
55
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""``RepoMap`` facade: ranked symbols (structured) + a budget-packed text map."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
|
|
7
|
+
from agentforge_graph.config import RepoMapConfig
|
|
8
|
+
from agentforge_graph.core import EdgeKind, GraphQuery, NodeKind, SymbolID
|
|
9
|
+
from agentforge_graph.store import Store
|
|
10
|
+
|
|
11
|
+
from .rank import RankedSymbol, rank_symbols
|
|
12
|
+
from .render import render_map
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RepoMap:
|
|
16
|
+
def __init__(self, store: Store, config: RepoMapConfig) -> None:
|
|
17
|
+
self.store = store
|
|
18
|
+
self.config = config
|
|
19
|
+
|
|
20
|
+
def _kinds(self, override: list[NodeKind] | None) -> list[NodeKind]:
|
|
21
|
+
return override if override is not None else [NodeKind(k) for k in self.config.kinds]
|
|
22
|
+
|
|
23
|
+
async def ranked_symbols(
|
|
24
|
+
self, k: int = 100, focus: Sequence[str] | None = None
|
|
25
|
+
) -> list[RankedSymbol]:
|
|
26
|
+
ranked = await rank_symbols(
|
|
27
|
+
self.store,
|
|
28
|
+
self._kinds(None),
|
|
29
|
+
self.config.damping,
|
|
30
|
+
self.config.edge_weights,
|
|
31
|
+
focus,
|
|
32
|
+
public_bias=self.config.public_bias,
|
|
33
|
+
)
|
|
34
|
+
return ranked[:k]
|
|
35
|
+
|
|
36
|
+
async def render(
|
|
37
|
+
self,
|
|
38
|
+
budget_tokens: int | None = None,
|
|
39
|
+
focus: Sequence[str] | None = None,
|
|
40
|
+
scope: str | None = None,
|
|
41
|
+
kinds: list[NodeKind] | None = None,
|
|
42
|
+
) -> str:
|
|
43
|
+
budget = budget_tokens if budget_tokens is not None else self.config.default_budget
|
|
44
|
+
ranked = await rank_symbols(
|
|
45
|
+
self.store,
|
|
46
|
+
self._kinds(kinds),
|
|
47
|
+
self.config.damping,
|
|
48
|
+
self.config.edge_weights,
|
|
49
|
+
focus,
|
|
50
|
+
scope,
|
|
51
|
+
public_bias=self.config.public_bias,
|
|
52
|
+
)
|
|
53
|
+
return render_map(ranked, budget, await self._file_summaries())
|
|
54
|
+
|
|
55
|
+
async def _file_summaries(self) -> dict[str, str]:
|
|
56
|
+
"""path -> file summary text (feat-012), when summaries have been run."""
|
|
57
|
+
nodes = (
|
|
58
|
+
await self.store.graph.query(GraphQuery(kinds=[NodeKind.SUMMARY], limit=10**9))
|
|
59
|
+
).nodes
|
|
60
|
+
out: dict[str, str] = {}
|
|
61
|
+
for n in nodes:
|
|
62
|
+
if str(n.attrs.get("level")) != "file":
|
|
63
|
+
continue
|
|
64
|
+
for e in await self.store.graph.adjacent(n.id, [EdgeKind.SUMMARIZES], "out"):
|
|
65
|
+
out[SymbolID.parse(e.dst).path] = str(n.attrs.get("text", ""))
|
|
66
|
+
return out
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""agentforge_graph.retrieve — hybrid retrieval (feat-006).
|
|
2
|
+
|
|
3
|
+
Vector entry → typed graph expansion → provenance-weighted merge, as one
|
|
4
|
+
typed call. Deterministic and LLM-free in the retrieval path; imports
|
|
5
|
+
nothing from ``agentforge`` (ADR-0001).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from .pack import ContextItem, ContextPack
|
|
11
|
+
from .rerank import NoopReranker, Reranker
|
|
12
|
+
from .retriever import Mode, Retriever
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"ContextItem",
|
|
16
|
+
"ContextPack",
|
|
17
|
+
"Retriever",
|
|
18
|
+
"Mode",
|
|
19
|
+
"Reranker",
|
|
20
|
+
"NoopReranker",
|
|
21
|
+
]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""The retrieval result: a ranked, deduped, connected context pack.
|
|
2
|
+
|
|
3
|
+
``render`` packs highest-score items first, emits whole code blocks (never
|
|
4
|
+
splits a chunk), and degrades an over-budget item to its signature line.
|
|
5
|
+
``to_dict`` is the structured form feat-008 tools return.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
from agentforge_graph.chunking import estimate_tokens
|
|
15
|
+
from agentforge_graph.core import NodeKind, Source
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ContextItem(BaseModel):
|
|
19
|
+
id: str # symbol or chunk id
|
|
20
|
+
kind: NodeKind
|
|
21
|
+
name: str
|
|
22
|
+
score: float
|
|
23
|
+
path: str
|
|
24
|
+
span: tuple[int, int] | None = None
|
|
25
|
+
code: str | None = None # chunk text, rendered verbatim
|
|
26
|
+
provenance: Source
|
|
27
|
+
why: list[str] = Field(default_factory=list) # trace of how it was included
|
|
28
|
+
# feat-009: denormalised churn/authorship, present only when the temporal
|
|
29
|
+
# layer has mined this symbol (introduced/last_changed/churn_*/top_authors).
|
|
30
|
+
temporal: dict[str, Any] | None = None
|
|
31
|
+
|
|
32
|
+
def signature(self) -> str:
|
|
33
|
+
loc = f":{self.span[0]}-{self.span[1]}" if self.span else ""
|
|
34
|
+
return f"{self.path}{loc} {self.name} ({self.kind.value}) score={self.score:.2f}"
|
|
35
|
+
|
|
36
|
+
def block(self) -> str:
|
|
37
|
+
if self.code:
|
|
38
|
+
return f"# {self.signature()}\n{self.code}"
|
|
39
|
+
return self.signature()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ContextPack(BaseModel):
|
|
43
|
+
query: str | None = None
|
|
44
|
+
symbol: str | None = None
|
|
45
|
+
mode: str = "context"
|
|
46
|
+
items: list[ContextItem] = Field(default_factory=list)
|
|
47
|
+
notes: list[str] = Field(default_factory=list)
|
|
48
|
+
|
|
49
|
+
def render(self, budget_tokens: int) -> str:
|
|
50
|
+
out: list[str] = []
|
|
51
|
+
used = 0
|
|
52
|
+
dropped = 0
|
|
53
|
+
for item in self.items: # already score-sorted
|
|
54
|
+
block = item.block()
|
|
55
|
+
cost = estimate_tokens(block)
|
|
56
|
+
if used + cost <= budget_tokens:
|
|
57
|
+
out.append(block)
|
|
58
|
+
used += cost
|
|
59
|
+
continue
|
|
60
|
+
sig = item.signature() # degrade to a signature instead of splitting
|
|
61
|
+
sig_cost = estimate_tokens(sig)
|
|
62
|
+
if used + sig_cost <= budget_tokens:
|
|
63
|
+
out.append(sig)
|
|
64
|
+
used += sig_cost
|
|
65
|
+
else:
|
|
66
|
+
dropped += 1
|
|
67
|
+
footer: list[str] = []
|
|
68
|
+
if dropped:
|
|
69
|
+
footer.append(f"… {dropped} more item(s) omitted (token budget)")
|
|
70
|
+
footer.extend(self.notes)
|
|
71
|
+
if footer:
|
|
72
|
+
out.append("\n".join(footer))
|
|
73
|
+
return "\n\n".join(out)
|
|
74
|
+
|
|
75
|
+
def to_dict(self) -> dict[str, Any]:
|
|
76
|
+
return self.model_dump()
|