agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
"""``ImportResolver`` — pass 2 of ingestion (feat-002).
|
|
2
|
+
|
|
3
|
+
Graph-only and idempotent: reads the imports/refs that pass 1 recorded as
|
|
4
|
+
node attrs and turns them into ``IMPORTS`` and ``CALLS`` edges. Resolution
|
|
5
|
+
is conservative — a call edge is created only when the name resolves to
|
|
6
|
+
*exactly one* target (a local top-level def or a uniquely imported name);
|
|
7
|
+
ambiguous or external-only calls are left unresolved and tallied, never
|
|
8
|
+
guessed (ADR-0004). All edges are written with ``source=resolved`` via
|
|
9
|
+
``GraphStore.add`` so they survive ``delete_file`` of the code files.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import posixpath
|
|
15
|
+
|
|
16
|
+
from agentforge_graph.core import (
|
|
17
|
+
Descriptor,
|
|
18
|
+
Edge,
|
|
19
|
+
EdgeKind,
|
|
20
|
+
GraphQuery,
|
|
21
|
+
GraphStore,
|
|
22
|
+
Node,
|
|
23
|
+
NodeKind,
|
|
24
|
+
Provenance,
|
|
25
|
+
SymbolID,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
from .pack import PackRegistry
|
|
29
|
+
from .report import ResolveStats
|
|
30
|
+
|
|
31
|
+
_ALL = 10_000_000 # effectively unbounded query for v0.1 graph sizes
|
|
32
|
+
_INIT_FILES = ("__init__.py", "__init__.pyi")
|
|
33
|
+
# Receivers that unambiguously denote the enclosing instance/class across the
|
|
34
|
+
# packs that capture a receiver: `self` (Py/Rust/Ruby), `this` (TS/JS/Java/C#/
|
|
35
|
+
# C++), `$this` (PHP). A call on one of these binds to the enclosing class's
|
|
36
|
+
# method (BUG-006); any other receiver is left unresolved (ADR-0004).
|
|
37
|
+
_SELF_RECV = frozenset({"self", "this", "$this"})
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _detect_source_roots(file_paths: list[str]) -> set[str]:
|
|
41
|
+
"""Directories that are a prefix of file paths but **not** part of the import
|
|
42
|
+
namespace — e.g. ``src`` in a ``src/``-layout package (BUG-001). A source
|
|
43
|
+
root is the parent of a *top-level* package (a package dir whose own parent
|
|
44
|
+
is not a package). Detected from ``__init__.py`` presence."""
|
|
45
|
+
pkg_dirs = {posixpath.dirname(p) for p in file_paths if posixpath.basename(p) in _INIT_FILES}
|
|
46
|
+
roots = {posixpath.dirname(d) for d in pkg_dirs if posixpath.dirname(d) not in pkg_dirs}
|
|
47
|
+
return {r for r in roots if r} # "" (repo-root layout) needs no stripping
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _strip_root(path: str, roots: set[str]) -> str:
|
|
51
|
+
for r in sorted(roots, key=len, reverse=True):
|
|
52
|
+
if path.startswith(r + "/"):
|
|
53
|
+
return path[len(r) + 1 :]
|
|
54
|
+
return path
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _path_namespace(path: str) -> str:
|
|
58
|
+
"""Rust: the crate-relative module path derived from a file path, in `/` form.
|
|
59
|
+
``src/a/b.rs`` -> ``a/b``; ``src/a/mod.rs`` -> ``a``; ``src/lib.rs`` /
|
|
60
|
+
``src/main.rs`` -> ``"" `` (the crate root)."""
|
|
61
|
+
p = path[4:] if path.startswith("src/") else path
|
|
62
|
+
if p.endswith(".rs"):
|
|
63
|
+
p = p[:-3]
|
|
64
|
+
if p.endswith("/mod"):
|
|
65
|
+
p = p[:-4]
|
|
66
|
+
return "" if p in ("lib", "main", "mod") else p
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ImportResolver:
|
|
70
|
+
def __init__(self, registry: PackRegistry, commit: str = "", go_module: str = "") -> None:
|
|
71
|
+
self.registry = registry
|
|
72
|
+
self.commit = commit
|
|
73
|
+
self.go_module = go_module # go.mod module path (Go import-prefix stripping)
|
|
74
|
+
self.name = "import-resolver"
|
|
75
|
+
|
|
76
|
+
async def resolve(
|
|
77
|
+
self, store: GraphStore, changed_files: list[str] | None = None
|
|
78
|
+
) -> ResolveStats:
|
|
79
|
+
prov = Provenance.resolved(self.name, self.commit)
|
|
80
|
+
all_nodes = (await store.query(GraphQuery(limit=_ALL))).nodes
|
|
81
|
+
files = [n for n in all_nodes if n.kind is NodeKind.FILE]
|
|
82
|
+
|
|
83
|
+
# module index + per-module top-level exports (direct CONTAINS children)
|
|
84
|
+
roots = _detect_source_roots([SymbolID.parse(f.id).path for f in files])
|
|
85
|
+
module_to_file: dict[str, str] = {}
|
|
86
|
+
file_module: dict[str, str] = {}
|
|
87
|
+
exports: dict[str, dict[str, str]] = {}
|
|
88
|
+
file_default: dict[str, str] = {} # module -> CommonJS `module.exports = <name>` (BUG-006)
|
|
89
|
+
# namespace FQN index (PHP/Java): "App/Foo/Bar" -> (file id, symbol id)
|
|
90
|
+
fqn_to_file: dict[str, str] = {}
|
|
91
|
+
fqn_to_sym: dict[str, str] = {}
|
|
92
|
+
# namespace-prefix index (C#): "App/Geo" -> ({file ids}, {symbol name -> id})
|
|
93
|
+
ns_to_files: dict[str, set[str]] = {}
|
|
94
|
+
ns_to_syms: dict[str, dict[str, str]] = {}
|
|
95
|
+
for f in files:
|
|
96
|
+
ps = SymbolID.parse(f.id)
|
|
97
|
+
pack = self.registry.for_slug(ps.lang)
|
|
98
|
+
if pack is None:
|
|
99
|
+
continue
|
|
100
|
+
# strip a source root (e.g. `src/`) for namespace (dotted) packs so a
|
|
101
|
+
# file's module key matches how it's imported (BUG-001); relative
|
|
102
|
+
# packs (TS/JS) resolve by path and need no stripping.
|
|
103
|
+
key_path = _strip_root(ps.path, roots) if pack.module_style == "dotted" else ps.path
|
|
104
|
+
module = pack.module_path(key_path)
|
|
105
|
+
# Go packages are directory-level: many files share one module key.
|
|
106
|
+
# Keep the first file as the package's IMPORTS target, but *merge*
|
|
107
|
+
# every file's top-level defs into the package's export map so
|
|
108
|
+
# same-package cross-file calls resolve (no import needed in Go).
|
|
109
|
+
# File-level packs (Python/TS/JS) have unique keys, so setdefault +
|
|
110
|
+
# update behave exactly like plain assignment for them.
|
|
111
|
+
module_to_file.setdefault(module, f.id)
|
|
112
|
+
file_module[f.id] = module
|
|
113
|
+
de = f.attrs.get("default_export", "")
|
|
114
|
+
if de:
|
|
115
|
+
file_default[module] = de
|
|
116
|
+
# Sort by id so the name->symbol maps below are deterministic when a
|
|
117
|
+
# file has several same-named callables (e.g. Python @overload stubs):
|
|
118
|
+
# the dict build is last-write-wins and store.neighbors() order is not
|
|
119
|
+
# stable across an incremental vs a full build. Without this, a call
|
|
120
|
+
# resolves to a different (but equally valid) overload instance
|
|
121
|
+
# depending on build history, breaking the incremental == full
|
|
122
|
+
# contract (feat-004).
|
|
123
|
+
members = sorted(
|
|
124
|
+
await store.neighbors(f.id, [EdgeKind.CONTAINS], depth=1),
|
|
125
|
+
key=lambda m: m.id,
|
|
126
|
+
)
|
|
127
|
+
exports.setdefault(module, {}).update({m.name: m.id for m in members})
|
|
128
|
+
# namespace packs: index each top-level symbol by its fully-qualified
|
|
129
|
+
# name (file's declared namespace + symbol name), normalized to "/".
|
|
130
|
+
ns = (
|
|
131
|
+
_path_namespace(ps.path)
|
|
132
|
+
if pack.namespace_from_path
|
|
133
|
+
else f.attrs.get("namespace", "")
|
|
134
|
+
)
|
|
135
|
+
if ns and pack.namespace_sep:
|
|
136
|
+
ns_key = ns.replace(pack.namespace_sep, "/")
|
|
137
|
+
ns_to_files.setdefault(ns_key, set()).add(f.id)
|
|
138
|
+
for m in members:
|
|
139
|
+
fqn = f"{ns_key}/{m.name}"
|
|
140
|
+
fqn_to_file.setdefault(fqn, f.id)
|
|
141
|
+
fqn_to_sym.setdefault(fqn, m.id)
|
|
142
|
+
ns_to_syms.setdefault(ns_key, {}).setdefault(m.name, m.id)
|
|
143
|
+
|
|
144
|
+
stats = ResolveStats()
|
|
145
|
+
new_nodes: list[Node] = []
|
|
146
|
+
edges: list[Edge] = []
|
|
147
|
+
seen_edges: set[tuple[str, str, str]] = set()
|
|
148
|
+
packages: dict[str, str] = {} # package id -> module
|
|
149
|
+
bindings: dict[str, dict[str, str]] = {} # file id -> {imported name -> target id}
|
|
150
|
+
# BUG-006: file id -> {local module alias -> in-repo module key}, for
|
|
151
|
+
# whole-module imports (`import m`) and default requires (`const m =
|
|
152
|
+
# require("./m")`). Lets `m.f()` bind to module `m`'s top-level export `f`.
|
|
153
|
+
module_alias: dict[str, dict[str, str]] = {}
|
|
154
|
+
|
|
155
|
+
def _add_edge(src: str, dst: str, kind: EdgeKind) -> bool:
|
|
156
|
+
key = (src, dst, kind.value)
|
|
157
|
+
if key in seen_edges:
|
|
158
|
+
return False
|
|
159
|
+
seen_edges.add(key)
|
|
160
|
+
# Own the edge by its source-side file (the import/call site), so a
|
|
161
|
+
# later incremental re-resolve can invalidate exactly these edges
|
|
162
|
+
# via clear_resolved (feat-004). src is a FILE node (IMPORTS) or a
|
|
163
|
+
# symbol in the caller's file (CALLS); both parse to that file path.
|
|
164
|
+
edges.append(
|
|
165
|
+
Edge(
|
|
166
|
+
src=src,
|
|
167
|
+
dst=dst,
|
|
168
|
+
kind=kind,
|
|
169
|
+
provenance=prov,
|
|
170
|
+
origin_path=SymbolID.parse(src).path,
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
def _external(slug: str, repo: str, module: str) -> str:
|
|
176
|
+
pid = SymbolID.for_symbol(slug, repo, "<external>", Descriptor.namespace(module))
|
|
177
|
+
if pid not in packages:
|
|
178
|
+
packages[pid] = module
|
|
179
|
+
new_nodes.append(
|
|
180
|
+
Node(
|
|
181
|
+
id=pid,
|
|
182
|
+
kind=NodeKind.PACKAGE,
|
|
183
|
+
name=module,
|
|
184
|
+
attrs={"external": True},
|
|
185
|
+
provenance=prov,
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
return pid
|
|
189
|
+
|
|
190
|
+
def _is_target(path: str) -> bool:
|
|
191
|
+
return changed_files is None or path in changed_files
|
|
192
|
+
|
|
193
|
+
# --- imports -> IMPORTS edges + per-file name bindings ---
|
|
194
|
+
for f in files:
|
|
195
|
+
ps = SymbolID.parse(f.id)
|
|
196
|
+
pack = self.registry.for_slug(ps.lang)
|
|
197
|
+
binding = bindings.setdefault(f.id, {})
|
|
198
|
+
for imp in f.attrs.get("imports", []):
|
|
199
|
+
module = imp.get("module", "")
|
|
200
|
+
names = imp.get("names", [])
|
|
201
|
+
if not module:
|
|
202
|
+
continue
|
|
203
|
+
# namespace imports (PHP/Java/C#). Path-based handling is skipped.
|
|
204
|
+
if pack is not None and pack.namespace_sep:
|
|
205
|
+
norm = module.replace(pack.namespace_sep, "/")
|
|
206
|
+
if pack.namespace_import_prefix:
|
|
207
|
+
# C#: `using App.Geo` names a namespace -> IMPORTS to every
|
|
208
|
+
# in-repo file declaring it, and bind all its symbols.
|
|
209
|
+
tgt_files = ns_to_files.get(norm)
|
|
210
|
+
if tgt_files:
|
|
211
|
+
for tf in sorted(tgt_files):
|
|
212
|
+
if _is_target(ps.path) and _add_edge(f.id, tf, EdgeKind.IMPORTS):
|
|
213
|
+
stats.imports_resolved += 1
|
|
214
|
+
for nm, sym in ns_to_syms.get(norm, {}).items():
|
|
215
|
+
binding.setdefault(nm, sym)
|
|
216
|
+
else:
|
|
217
|
+
pid = _external(ps.lang, ps.repo, module)
|
|
218
|
+
if _is_target(ps.path) and _add_edge(f.id, pid, EdgeKind.IMPORTS):
|
|
219
|
+
stats.imports_external += 1
|
|
220
|
+
continue
|
|
221
|
+
# Rust: `use crate::a::b::Item` -> strip the crate root prefix
|
|
222
|
+
# so the path matches a file-derived module key.
|
|
223
|
+
if pack.namespace_from_path and norm.startswith("crate/"):
|
|
224
|
+
norm = norm[len("crate/") :]
|
|
225
|
+
# PHP/Java/Rust: a path naming a single item (class/struct/fn)
|
|
226
|
+
# -> the file declaring it; bind the item name.
|
|
227
|
+
tgt_file = fqn_to_file.get(norm)
|
|
228
|
+
if tgt_file is not None:
|
|
229
|
+
if _is_target(ps.path) and _add_edge(f.id, tgt_file, EdgeKind.IMPORTS):
|
|
230
|
+
stats.imports_resolved += 1
|
|
231
|
+
local_name = module.rsplit(pack.namespace_sep, 1)[-1]
|
|
232
|
+
binding[local_name] = fqn_to_sym[norm]
|
|
233
|
+
else:
|
|
234
|
+
pid = _external(ps.lang, ps.repo, module)
|
|
235
|
+
if _is_target(ps.path) and _add_edge(f.id, pid, EdgeKind.IMPORTS):
|
|
236
|
+
stats.imports_external += 1
|
|
237
|
+
binding.setdefault(module.rsplit(pack.namespace_sep, 1)[-1], pid)
|
|
238
|
+
continue
|
|
239
|
+
# Resolve the import as written (relative path / dotted module,
|
|
240
|
+
# incl. Python leading-dot relative imports) to a key comparable
|
|
241
|
+
# to the module index. file_module gives the importer's own
|
|
242
|
+
# source-root-stripped module key for relative resolution.
|
|
243
|
+
key = (
|
|
244
|
+
pack.resolve_import(ps.path, module, file_module.get(f.id, ""))
|
|
245
|
+
if pack
|
|
246
|
+
else module
|
|
247
|
+
)
|
|
248
|
+
# directory import: `require("./router")` / `import … "./router"`
|
|
249
|
+
# resolves to `./router/index` (BUG-006 — relative packs).
|
|
250
|
+
if key not in module_to_file and f"{key}/index" in module_to_file:
|
|
251
|
+
key = f"{key}/index"
|
|
252
|
+
# Go: an import path is `<go.mod module>/<dir>`. If we know the
|
|
253
|
+
# module prefix (from go.mod), strip it exactly — this maps both the
|
|
254
|
+
# *root* package (key "") and any sub-package. Otherwise fall back to
|
|
255
|
+
# suffix-matching leading segments to an in-repo dir. stdlib/third-
|
|
256
|
+
# party never match → stay external.
|
|
257
|
+
if key not in module_to_file and pack is not None and pack.module_style == "go":
|
|
258
|
+
if self.go_module and (
|
|
259
|
+
key == self.go_module or key.startswith(self.go_module + "/")
|
|
260
|
+
):
|
|
261
|
+
rel = key[len(self.go_module) :].lstrip("/")
|
|
262
|
+
if rel in module_to_file:
|
|
263
|
+
key = rel
|
|
264
|
+
if key not in module_to_file:
|
|
265
|
+
segs = key.split("/")
|
|
266
|
+
for i in range(1, len(segs)):
|
|
267
|
+
cand = "/".join(segs[i:])
|
|
268
|
+
if cand in module_to_file:
|
|
269
|
+
key = cand
|
|
270
|
+
break
|
|
271
|
+
default_name = imp.get("default", "")
|
|
272
|
+
if key in module_to_file:
|
|
273
|
+
if _is_target(ps.path) and _add_edge(
|
|
274
|
+
f.id, module_to_file[key], EdgeKind.IMPORTS
|
|
275
|
+
):
|
|
276
|
+
stats.imports_resolved += 1
|
|
277
|
+
sep = "/" if pack is not None and pack.module_style != "dotted" else "."
|
|
278
|
+
for nm in names:
|
|
279
|
+
tgt = exports.get(key, {}).get(nm)
|
|
280
|
+
if tgt:
|
|
281
|
+
binding[nm] = tgt
|
|
282
|
+
continue
|
|
283
|
+
# `from pkg import sub` where `sub` is an in-repo *submodule*
|
|
284
|
+
# (not a def of pkg): alias the local name to that module so
|
|
285
|
+
# `sub.f()` / `extends sub.Base` resolve to its exports, and
|
|
286
|
+
# point IMPORTS at the submodule file (BUG-006 aliased import).
|
|
287
|
+
sub_key = f"{key}{sep}{nm}" if key else nm
|
|
288
|
+
if sub_key in module_to_file:
|
|
289
|
+
module_alias.setdefault(f.id, {})[nm] = sub_key
|
|
290
|
+
if _is_target(ps.path) and _add_edge(
|
|
291
|
+
f.id, module_to_file[sub_key], EdgeKind.IMPORTS
|
|
292
|
+
):
|
|
293
|
+
stats.imports_resolved += 1
|
|
294
|
+
# CommonJS default require: bind the local name to the target
|
|
295
|
+
# module's `module.exports = <name>` symbol (BUG-006).
|
|
296
|
+
if default_name:
|
|
297
|
+
exp = file_default.get(key, "")
|
|
298
|
+
tgt = exports.get(key, {}).get(exp) if exp else None
|
|
299
|
+
if tgt:
|
|
300
|
+
binding[default_name] = tgt
|
|
301
|
+
# also a module alias, so `default_name.f()` reaches a
|
|
302
|
+
# top-level export `f` of the module (BUG-006 member access).
|
|
303
|
+
module_alias.setdefault(f.id, {})[default_name] = key
|
|
304
|
+
# whole-module import (`import m`): `m` aliases the module, so
|
|
305
|
+
# `m.f()` resolves to its top-level export `f` (BUG-006).
|
|
306
|
+
elif not names:
|
|
307
|
+
module_alias.setdefault(f.id, {})[module] = key
|
|
308
|
+
# wildcard import (Ruby `require_relative`): a name-less in-repo
|
|
309
|
+
# import makes all the target file's top-level defs callable.
|
|
310
|
+
if pack is not None and pack.wildcard_import and not names and not default_name:
|
|
311
|
+
for nm, tgt in exports.get(key, {}).items():
|
|
312
|
+
binding.setdefault(nm, tgt)
|
|
313
|
+
else:
|
|
314
|
+
pid = _external(ps.lang, ps.repo, module)
|
|
315
|
+
if _is_target(ps.path) and _add_edge(f.id, pid, EdgeKind.IMPORTS):
|
|
316
|
+
stats.imports_external += 1
|
|
317
|
+
for nm in names:
|
|
318
|
+
binding.setdefault(nm, pid)
|
|
319
|
+
if default_name:
|
|
320
|
+
binding.setdefault(default_name, pid)
|
|
321
|
+
if not names and not default_name:
|
|
322
|
+
binding.setdefault(module.split(".")[-1], pid)
|
|
323
|
+
|
|
324
|
+
# --- calls -> CALLS edges (unique match only) ---
|
|
325
|
+
path_to_file = {SymbolID.parse(f.id).path: f.id for f in files}
|
|
326
|
+
node_by_id = {n.id: n for n in all_nodes}
|
|
327
|
+
# BUG-006: lazily resolve `self.f()`/`this.f()` to the *enclosing class's*
|
|
328
|
+
# method — a unique, safe match (ADR-0004). Caches keep it cheap and
|
|
329
|
+
# deterministic (methods sorted by id, like the export map above).
|
|
330
|
+
method_cache: dict[str, dict[str, str]] = {}
|
|
331
|
+
enclosing_cache: dict[str, str | None] = {}
|
|
332
|
+
|
|
333
|
+
async def _methods_of(class_id: str) -> dict[str, str]:
|
|
334
|
+
cached = method_cache.get(class_id)
|
|
335
|
+
if cached is None:
|
|
336
|
+
members = sorted(
|
|
337
|
+
await store.neighbors(class_id, [EdgeKind.CONTAINS], depth=1),
|
|
338
|
+
key=lambda m: m.id,
|
|
339
|
+
)
|
|
340
|
+
cached = {m.name: m.id for m in members}
|
|
341
|
+
method_cache[class_id] = cached
|
|
342
|
+
return cached
|
|
343
|
+
|
|
344
|
+
async def _enclosing_class(node_id: str) -> str | None:
|
|
345
|
+
if node_id in enclosing_cache:
|
|
346
|
+
return enclosing_cache[node_id]
|
|
347
|
+
cls: str | None = None
|
|
348
|
+
for e in await store.adjacent(node_id, [EdgeKind.CONTAINS], "in"):
|
|
349
|
+
parent = node_by_id.get(e.src)
|
|
350
|
+
if parent is not None and parent.kind is NodeKind.CLASS:
|
|
351
|
+
cls = e.src
|
|
352
|
+
break
|
|
353
|
+
enclosing_cache[node_id] = cls
|
|
354
|
+
return cls
|
|
355
|
+
|
|
356
|
+
# --- inheritance -> INHERITS edges (subclass -> base; unique match) ---
|
|
357
|
+
# Resolve bases first and keep a superclass map, so the call loop below can
|
|
358
|
+
# walk it for inherited `self.f()` (the method is defined on a base class).
|
|
359
|
+
superclasses: dict[str, list[str]] = {}
|
|
360
|
+
for n in all_nodes:
|
|
361
|
+
bases = n.attrs.get("bases")
|
|
362
|
+
if not bases or n.kind is not NodeKind.CLASS:
|
|
363
|
+
continue
|
|
364
|
+
owner_file = path_to_file.get(SymbolID.parse(n.id).path)
|
|
365
|
+
local = exports.get(file_module.get(owner_file, ""), {}) if owner_file else {}
|
|
366
|
+
binding = bindings.get(owner_file, {}) if owner_file else {}
|
|
367
|
+
aliases = module_alias.get(owner_file, {}) if owner_file else {}
|
|
368
|
+
resolved: list[str] = []
|
|
369
|
+
for base in bases:
|
|
370
|
+
bt = local.get(base) or binding.get(base)
|
|
371
|
+
# qualified base `mod.Base`: resolve `mod` as an imported module
|
|
372
|
+
# alias, then `Base` as that module's top-level export (BUG-006).
|
|
373
|
+
if bt is None and "." in base:
|
|
374
|
+
recv, _, base_name = base.rpartition(".")
|
|
375
|
+
mod_key = aliases.get(recv)
|
|
376
|
+
if mod_key is not None:
|
|
377
|
+
bt = exports.get(mod_key, {}).get(base_name)
|
|
378
|
+
# only an in-repo class is a valid base (external/by-name-only stays
|
|
379
|
+
# unresolved — never guessed, ADR-0004)
|
|
380
|
+
tnode = node_by_id.get(bt) if bt else None
|
|
381
|
+
if tnode is not None and tnode.kind is NodeKind.CLASS and bt is not None:
|
|
382
|
+
resolved.append(bt)
|
|
383
|
+
if not resolved:
|
|
384
|
+
continue
|
|
385
|
+
superclasses[n.id] = resolved
|
|
386
|
+
if _is_target(SymbolID.parse(n.id).path):
|
|
387
|
+
for b in resolved:
|
|
388
|
+
if _add_edge(n.id, b, EdgeKind.INHERITS):
|
|
389
|
+
stats.inherits_resolved += 1
|
|
390
|
+
|
|
391
|
+
async def _inherited_method(class_id: str, name: str) -> str | None:
|
|
392
|
+
"""A method ``name`` defined on a *base* of ``class_id`` — resolved only
|
|
393
|
+
when exactly one base in the transitive closure defines it (no MRO
|
|
394
|
+
guessing across multiple definers, ADR-0004)."""
|
|
395
|
+
seen: set[str] = set()
|
|
396
|
+
found: set[str] = set()
|
|
397
|
+
frontier = list(superclasses.get(class_id, []))
|
|
398
|
+
while frontier:
|
|
399
|
+
b = frontier.pop()
|
|
400
|
+
if b in seen:
|
|
401
|
+
continue
|
|
402
|
+
seen.add(b)
|
|
403
|
+
m = (await _methods_of(b)).get(name)
|
|
404
|
+
if m:
|
|
405
|
+
found.add(m)
|
|
406
|
+
frontier.extend(superclasses.get(b, []))
|
|
407
|
+
return next(iter(found)) if len(found) == 1 else None
|
|
408
|
+
|
|
409
|
+
# Go: methods are package-scoped and attached to a receiver type, not
|
|
410
|
+
# AST-nested in it. Index them by (package, type) so a call on a method's
|
|
411
|
+
# own receiver (`s.f()`) resolves to a method of that type (BUG-006).
|
|
412
|
+
go_methods: dict[tuple[str, str], dict[str, str]] = {}
|
|
413
|
+
for n in sorted(all_nodes, key=lambda z: z.id):
|
|
414
|
+
rtype = n.attrs.get("recv_type")
|
|
415
|
+
if not rtype:
|
|
416
|
+
continue
|
|
417
|
+
owner = path_to_file.get(SymbolID.parse(n.id).path, "")
|
|
418
|
+
go_methods.setdefault((file_module.get(owner, ""), rtype), {})[n.name] = n.id
|
|
419
|
+
|
|
420
|
+
for n in all_nodes:
|
|
421
|
+
refs = n.attrs.get("refs")
|
|
422
|
+
if not refs:
|
|
423
|
+
continue
|
|
424
|
+
ps = SymbolID.parse(n.id)
|
|
425
|
+
if not _is_target(ps.path):
|
|
426
|
+
continue
|
|
427
|
+
owner_file = path_to_file.get(ps.path)
|
|
428
|
+
local = exports.get(file_module.get(owner_file, ""), {}) if owner_file else {}
|
|
429
|
+
binding = bindings.get(owner_file, {}) if owner_file else {}
|
|
430
|
+
aliases = module_alias.get(owner_file, {}) if owner_file else {}
|
|
431
|
+
for ref in refs:
|
|
432
|
+
nm = ref.get("name")
|
|
433
|
+
recv = ref.get("recv")
|
|
434
|
+
target: str | None = None
|
|
435
|
+
if not nm:
|
|
436
|
+
target = None
|
|
437
|
+
elif recv in _SELF_RECV:
|
|
438
|
+
# an intra-class call: bind to a method of the enclosing class,
|
|
439
|
+
# or — failing that — a method inherited from a unique base.
|
|
440
|
+
cls = await _enclosing_class(n.id)
|
|
441
|
+
if cls is not None:
|
|
442
|
+
target = (await _methods_of(cls)).get(nm)
|
|
443
|
+
if target is None:
|
|
444
|
+
target = await _inherited_method(cls, nm)
|
|
445
|
+
elif recv is not None and recv == n.attrs.get("recv_var"):
|
|
446
|
+
# Go: a call on the method's own receiver (`s.f()`) → a method
|
|
447
|
+
# of the receiver's type.
|
|
448
|
+
key = (file_module.get(owner_file or "", ""), str(n.attrs.get("recv_type", "")))
|
|
449
|
+
target = go_methods.get(key, {}).get(nm)
|
|
450
|
+
elif recv is not None:
|
|
451
|
+
# `m.f()` where `m` is an imported module → its export `f`;
|
|
452
|
+
# any other receiver is not a unique target (never guessed
|
|
453
|
+
# onto a same-named module-level def, ADR-0004).
|
|
454
|
+
mod_key = aliases.get(recv)
|
|
455
|
+
if mod_key is not None:
|
|
456
|
+
target = exports.get(mod_key, {}).get(nm)
|
|
457
|
+
else:
|
|
458
|
+
target = local.get(nm) or binding.get(nm)
|
|
459
|
+
if target and target not in packages: # external pkg isn't a callable target
|
|
460
|
+
if _add_edge(n.id, target, EdgeKind.CALLS):
|
|
461
|
+
stats.refs_resolved += 1
|
|
462
|
+
else:
|
|
463
|
+
stats.refs_unresolved += 1
|
|
464
|
+
|
|
465
|
+
if new_nodes or edges:
|
|
466
|
+
await store.add([*new_nodes, *edges]) # nodes first: edge endpoints must exist
|
|
467
|
+
return stats
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""``RepoSource`` — walk a repository and yield one ``SourceFile`` per
|
|
2
|
+
indexable file. The pipeline's only filesystem boundary.
|
|
3
|
+
|
|
4
|
+
Files with no matching pack are skipped silently (not our languages); files
|
|
5
|
+
excluded by glob or over the size limit are skipped *and recorded* in
|
|
6
|
+
``skipped`` so the count surfaces in the IndexReport — never a silent gap.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
from collections.abc import Iterator
|
|
13
|
+
from pathlib import Path, PurePosixPath
|
|
14
|
+
|
|
15
|
+
from agentforge_graph.config import DEFAULT_EXCLUDES
|
|
16
|
+
from agentforge_graph.core import SourceFile
|
|
17
|
+
|
|
18
|
+
from .pack import PackRegistry
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def read_go_module(root: str | Path) -> str:
|
|
22
|
+
"""The ``module`` path from a repo's ``go.mod`` (e.g.
|
|
23
|
+
``github.com/spf13/cobra``), or ``""`` if absent. The resolver strips this
|
|
24
|
+
prefix to map a Go import path to an in-repo package dir, including the
|
|
25
|
+
*root* package (whose dir key is ``""`` and can't be suffix-matched)."""
|
|
26
|
+
try:
|
|
27
|
+
text = (Path(root) / "go.mod").read_text(encoding="utf-8", errors="replace")
|
|
28
|
+
except OSError:
|
|
29
|
+
return ""
|
|
30
|
+
for line in text.splitlines():
|
|
31
|
+
stripped = line.strip()
|
|
32
|
+
if stripped.startswith("module "):
|
|
33
|
+
return stripped[len("module ") :].strip()
|
|
34
|
+
return ""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class RepoSource:
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
root: str | Path,
|
|
41
|
+
include: list[str] | None = None,
|
|
42
|
+
exclude: list[str] | None = None,
|
|
43
|
+
max_file_kb: int = 512,
|
|
44
|
+
) -> None:
|
|
45
|
+
self.root = Path(root)
|
|
46
|
+
self.include = include
|
|
47
|
+
self.exclude = list(DEFAULT_EXCLUDES) if exclude is None else exclude
|
|
48
|
+
self.max_file_kb = max_file_kb
|
|
49
|
+
self.skipped: list[str] = []
|
|
50
|
+
|
|
51
|
+
def iter_files(self, registry: PackRegistry) -> Iterator[SourceFile]:
|
|
52
|
+
self.skipped = [] # reset per walk so repeated walks (feat-004) stay accurate
|
|
53
|
+
for path in sorted(self.root.rglob("*")):
|
|
54
|
+
if not path.is_file():
|
|
55
|
+
continue
|
|
56
|
+
rel = path.relative_to(self.root).as_posix()
|
|
57
|
+
if self._is_excluded(rel) or not self._is_included(rel):
|
|
58
|
+
continue
|
|
59
|
+
pack = registry.for_extension(path.suffix)
|
|
60
|
+
if pack is None: # not a language we index
|
|
61
|
+
continue
|
|
62
|
+
if path.stat().st_size > self.max_file_kb * 1024:
|
|
63
|
+
self.skipped.append(f"{rel} (> {self.max_file_kb}KB)")
|
|
64
|
+
continue
|
|
65
|
+
raw = path.read_bytes()
|
|
66
|
+
yield SourceFile(
|
|
67
|
+
path=rel,
|
|
68
|
+
text=raw.decode("utf-8", errors="replace"),
|
|
69
|
+
language=pack.lang_slug,
|
|
70
|
+
content_hash=hashlib.sha256(raw).hexdigest(),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def _is_excluded(self, rel: str) -> bool:
|
|
74
|
+
return any(PurePosixPath(rel).full_match(glob) for glob in self.exclude)
|
|
75
|
+
|
|
76
|
+
def _is_included(self, rel: str) -> bool:
|
|
77
|
+
if self.include is None:
|
|
78
|
+
return True
|
|
79
|
+
return any(PurePosixPath(rel).full_match(glob) for glob in self.include)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""ADR & docs ingestion (feat-010): connect architecture decisions to the code
|
|
2
|
+
they govern — the gap no surveyed tool fills (research §3.3).
|
|
3
|
+
|
|
4
|
+
MVP: ADR markdown → ``Decision`` nodes (+ body ``DocChunk``s) with **parsed**
|
|
5
|
+
``GOVERNS``/``SUPERSEDES`` edges, ingested as per-ADR ``FileSubgraph`` upserts
|
|
6
|
+
(so they ride feat-004 incrementality). Retrieval surfaces a governing decision
|
|
7
|
+
when its governed code is retrieved. Zero ``agentforge`` imports (ADR-0001).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from .adr import ADRParser, ParsedADR
|
|
13
|
+
from .commits import CommitIngestor
|
|
14
|
+
from .ingest import KnowledgeIngestor
|
|
15
|
+
from .mentions import Mentions, extract_mentions, resolve_mentions
|
|
16
|
+
from .report import DecisionInfo, KnowledgeStats
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ADRParser",
|
|
20
|
+
"ParsedADR",
|
|
21
|
+
"CommitIngestor",
|
|
22
|
+
"KnowledgeIngestor",
|
|
23
|
+
"Mentions",
|
|
24
|
+
"extract_mentions",
|
|
25
|
+
"resolve_mentions",
|
|
26
|
+
"DecisionInfo",
|
|
27
|
+
"KnowledgeStats",
|
|
28
|
+
]
|