agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,467 @@
1
+ """``ImportResolver`` — pass 2 of ingestion (feat-002).
2
+
3
+ Graph-only and idempotent: reads the imports/refs that pass 1 recorded as
4
+ node attrs and turns them into ``IMPORTS`` and ``CALLS`` edges. Resolution
5
+ is conservative — a call edge is created only when the name resolves to
6
+ *exactly one* target (a local top-level def or a uniquely imported name);
7
+ ambiguous or external-only calls are left unresolved and tallied, never
8
+ guessed (ADR-0004). All edges are written with ``source=resolved`` via
9
+ ``GraphStore.add`` so they survive ``delete_file`` of the code files.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import posixpath
15
+
16
+ from agentforge_graph.core import (
17
+ Descriptor,
18
+ Edge,
19
+ EdgeKind,
20
+ GraphQuery,
21
+ GraphStore,
22
+ Node,
23
+ NodeKind,
24
+ Provenance,
25
+ SymbolID,
26
+ )
27
+
28
+ from .pack import PackRegistry
29
+ from .report import ResolveStats
30
+
31
+ _ALL = 10_000_000 # effectively unbounded query for v0.1 graph sizes
32
+ _INIT_FILES = ("__init__.py", "__init__.pyi")
33
+ # Receivers that unambiguously denote the enclosing instance/class across the
34
+ # packs that capture a receiver: `self` (Py/Rust/Ruby), `this` (TS/JS/Java/C#/
35
+ # C++), `$this` (PHP). A call on one of these binds to the enclosing class's
36
+ # method (BUG-006); any other receiver is left unresolved (ADR-0004).
37
+ _SELF_RECV = frozenset({"self", "this", "$this"})
38
+
39
+
40
+ def _detect_source_roots(file_paths: list[str]) -> set[str]:
41
+ """Directories that are a prefix of file paths but **not** part of the import
42
+ namespace — e.g. ``src`` in a ``src/``-layout package (BUG-001). A source
43
+ root is the parent of a *top-level* package (a package dir whose own parent
44
+ is not a package). Detected from ``__init__.py`` presence."""
45
+ pkg_dirs = {posixpath.dirname(p) for p in file_paths if posixpath.basename(p) in _INIT_FILES}
46
+ roots = {posixpath.dirname(d) for d in pkg_dirs if posixpath.dirname(d) not in pkg_dirs}
47
+ return {r for r in roots if r} # "" (repo-root layout) needs no stripping
48
+
49
+
50
+ def _strip_root(path: str, roots: set[str]) -> str:
51
+ for r in sorted(roots, key=len, reverse=True):
52
+ if path.startswith(r + "/"):
53
+ return path[len(r) + 1 :]
54
+ return path
55
+
56
+
57
+ def _path_namespace(path: str) -> str:
58
+ """Rust: the crate-relative module path derived from a file path, in `/` form.
59
+ ``src/a/b.rs`` -> ``a/b``; ``src/a/mod.rs`` -> ``a``; ``src/lib.rs`` /
60
+ ``src/main.rs`` -> ``"" `` (the crate root)."""
61
+ p = path[4:] if path.startswith("src/") else path
62
+ if p.endswith(".rs"):
63
+ p = p[:-3]
64
+ if p.endswith("/mod"):
65
+ p = p[:-4]
66
+ return "" if p in ("lib", "main", "mod") else p
67
+
68
+
69
+ class ImportResolver:
70
+ def __init__(self, registry: PackRegistry, commit: str = "", go_module: str = "") -> None:
71
+ self.registry = registry
72
+ self.commit = commit
73
+ self.go_module = go_module # go.mod module path (Go import-prefix stripping)
74
+ self.name = "import-resolver"
75
+
76
+ async def resolve(
77
+ self, store: GraphStore, changed_files: list[str] | None = None
78
+ ) -> ResolveStats:
79
+ prov = Provenance.resolved(self.name, self.commit)
80
+ all_nodes = (await store.query(GraphQuery(limit=_ALL))).nodes
81
+ files = [n for n in all_nodes if n.kind is NodeKind.FILE]
82
+
83
+ # module index + per-module top-level exports (direct CONTAINS children)
84
+ roots = _detect_source_roots([SymbolID.parse(f.id).path for f in files])
85
+ module_to_file: dict[str, str] = {}
86
+ file_module: dict[str, str] = {}
87
+ exports: dict[str, dict[str, str]] = {}
88
+ file_default: dict[str, str] = {} # module -> CommonJS `module.exports = <name>` (BUG-006)
89
+ # namespace FQN index (PHP/Java): "App/Foo/Bar" -> (file id, symbol id)
90
+ fqn_to_file: dict[str, str] = {}
91
+ fqn_to_sym: dict[str, str] = {}
92
+ # namespace-prefix index (C#): "App/Geo" -> ({file ids}, {symbol name -> id})
93
+ ns_to_files: dict[str, set[str]] = {}
94
+ ns_to_syms: dict[str, dict[str, str]] = {}
95
+ for f in files:
96
+ ps = SymbolID.parse(f.id)
97
+ pack = self.registry.for_slug(ps.lang)
98
+ if pack is None:
99
+ continue
100
+ # strip a source root (e.g. `src/`) for namespace (dotted) packs so a
101
+ # file's module key matches how it's imported (BUG-001); relative
102
+ # packs (TS/JS) resolve by path and need no stripping.
103
+ key_path = _strip_root(ps.path, roots) if pack.module_style == "dotted" else ps.path
104
+ module = pack.module_path(key_path)
105
+ # Go packages are directory-level: many files share one module key.
106
+ # Keep the first file as the package's IMPORTS target, but *merge*
107
+ # every file's top-level defs into the package's export map so
108
+ # same-package cross-file calls resolve (no import needed in Go).
109
+ # File-level packs (Python/TS/JS) have unique keys, so setdefault +
110
+ # update behave exactly like plain assignment for them.
111
+ module_to_file.setdefault(module, f.id)
112
+ file_module[f.id] = module
113
+ de = f.attrs.get("default_export", "")
114
+ if de:
115
+ file_default[module] = de
116
+ # Sort by id so the name->symbol maps below are deterministic when a
117
+ # file has several same-named callables (e.g. Python @overload stubs):
118
+ # the dict build is last-write-wins and store.neighbors() order is not
119
+ # stable across an incremental vs a full build. Without this, a call
120
+ # resolves to a different (but equally valid) overload instance
121
+ # depending on build history, breaking the incremental == full
122
+ # contract (feat-004).
123
+ members = sorted(
124
+ await store.neighbors(f.id, [EdgeKind.CONTAINS], depth=1),
125
+ key=lambda m: m.id,
126
+ )
127
+ exports.setdefault(module, {}).update({m.name: m.id for m in members})
128
+ # namespace packs: index each top-level symbol by its fully-qualified
129
+ # name (file's declared namespace + symbol name), normalized to "/".
130
+ ns = (
131
+ _path_namespace(ps.path)
132
+ if pack.namespace_from_path
133
+ else f.attrs.get("namespace", "")
134
+ )
135
+ if ns and pack.namespace_sep:
136
+ ns_key = ns.replace(pack.namespace_sep, "/")
137
+ ns_to_files.setdefault(ns_key, set()).add(f.id)
138
+ for m in members:
139
+ fqn = f"{ns_key}/{m.name}"
140
+ fqn_to_file.setdefault(fqn, f.id)
141
+ fqn_to_sym.setdefault(fqn, m.id)
142
+ ns_to_syms.setdefault(ns_key, {}).setdefault(m.name, m.id)
143
+
144
+ stats = ResolveStats()
145
+ new_nodes: list[Node] = []
146
+ edges: list[Edge] = []
147
+ seen_edges: set[tuple[str, str, str]] = set()
148
+ packages: dict[str, str] = {} # package id -> module
149
+ bindings: dict[str, dict[str, str]] = {} # file id -> {imported name -> target id}
150
+ # BUG-006: file id -> {local module alias -> in-repo module key}, for
151
+ # whole-module imports (`import m`) and default requires (`const m =
152
+ # require("./m")`). Lets `m.f()` bind to module `m`'s top-level export `f`.
153
+ module_alias: dict[str, dict[str, str]] = {}
154
+
155
+ def _add_edge(src: str, dst: str, kind: EdgeKind) -> bool:
156
+ key = (src, dst, kind.value)
157
+ if key in seen_edges:
158
+ return False
159
+ seen_edges.add(key)
160
+ # Own the edge by its source-side file (the import/call site), so a
161
+ # later incremental re-resolve can invalidate exactly these edges
162
+ # via clear_resolved (feat-004). src is a FILE node (IMPORTS) or a
163
+ # symbol in the caller's file (CALLS); both parse to that file path.
164
+ edges.append(
165
+ Edge(
166
+ src=src,
167
+ dst=dst,
168
+ kind=kind,
169
+ provenance=prov,
170
+ origin_path=SymbolID.parse(src).path,
171
+ )
172
+ )
173
+ return True
174
+
175
+ def _external(slug: str, repo: str, module: str) -> str:
176
+ pid = SymbolID.for_symbol(slug, repo, "<external>", Descriptor.namespace(module))
177
+ if pid not in packages:
178
+ packages[pid] = module
179
+ new_nodes.append(
180
+ Node(
181
+ id=pid,
182
+ kind=NodeKind.PACKAGE,
183
+ name=module,
184
+ attrs={"external": True},
185
+ provenance=prov,
186
+ )
187
+ )
188
+ return pid
189
+
190
+ def _is_target(path: str) -> bool:
191
+ return changed_files is None or path in changed_files
192
+
193
+ # --- imports -> IMPORTS edges + per-file name bindings ---
194
+ for f in files:
195
+ ps = SymbolID.parse(f.id)
196
+ pack = self.registry.for_slug(ps.lang)
197
+ binding = bindings.setdefault(f.id, {})
198
+ for imp in f.attrs.get("imports", []):
199
+ module = imp.get("module", "")
200
+ names = imp.get("names", [])
201
+ if not module:
202
+ continue
203
+ # namespace imports (PHP/Java/C#). Path-based handling is skipped.
204
+ if pack is not None and pack.namespace_sep:
205
+ norm = module.replace(pack.namespace_sep, "/")
206
+ if pack.namespace_import_prefix:
207
+ # C#: `using App.Geo` names a namespace -> IMPORTS to every
208
+ # in-repo file declaring it, and bind all its symbols.
209
+ tgt_files = ns_to_files.get(norm)
210
+ if tgt_files:
211
+ for tf in sorted(tgt_files):
212
+ if _is_target(ps.path) and _add_edge(f.id, tf, EdgeKind.IMPORTS):
213
+ stats.imports_resolved += 1
214
+ for nm, sym in ns_to_syms.get(norm, {}).items():
215
+ binding.setdefault(nm, sym)
216
+ else:
217
+ pid = _external(ps.lang, ps.repo, module)
218
+ if _is_target(ps.path) and _add_edge(f.id, pid, EdgeKind.IMPORTS):
219
+ stats.imports_external += 1
220
+ continue
221
+ # Rust: `use crate::a::b::Item` -> strip the crate root prefix
222
+ # so the path matches a file-derived module key.
223
+ if pack.namespace_from_path and norm.startswith("crate/"):
224
+ norm = norm[len("crate/") :]
225
+ # PHP/Java/Rust: a path naming a single item (class/struct/fn)
226
+ # -> the file declaring it; bind the item name.
227
+ tgt_file = fqn_to_file.get(norm)
228
+ if tgt_file is not None:
229
+ if _is_target(ps.path) and _add_edge(f.id, tgt_file, EdgeKind.IMPORTS):
230
+ stats.imports_resolved += 1
231
+ local_name = module.rsplit(pack.namespace_sep, 1)[-1]
232
+ binding[local_name] = fqn_to_sym[norm]
233
+ else:
234
+ pid = _external(ps.lang, ps.repo, module)
235
+ if _is_target(ps.path) and _add_edge(f.id, pid, EdgeKind.IMPORTS):
236
+ stats.imports_external += 1
237
+ binding.setdefault(module.rsplit(pack.namespace_sep, 1)[-1], pid)
238
+ continue
239
+ # Resolve the import as written (relative path / dotted module,
240
+ # incl. Python leading-dot relative imports) to a key comparable
241
+ # to the module index. file_module gives the importer's own
242
+ # source-root-stripped module key for relative resolution.
243
+ key = (
244
+ pack.resolve_import(ps.path, module, file_module.get(f.id, ""))
245
+ if pack
246
+ else module
247
+ )
248
+ # directory import: `require("./router")` / `import … "./router"`
249
+ # resolves to `./router/index` (BUG-006 — relative packs).
250
+ if key not in module_to_file and f"{key}/index" in module_to_file:
251
+ key = f"{key}/index"
252
+ # Go: an import path is `<go.mod module>/<dir>`. If we know the
253
+ # module prefix (from go.mod), strip it exactly — this maps both the
254
+ # *root* package (key "") and any sub-package. Otherwise fall back to
255
+ # suffix-matching leading segments to an in-repo dir. stdlib/third-
256
+ # party never match → stay external.
257
+ if key not in module_to_file and pack is not None and pack.module_style == "go":
258
+ if self.go_module and (
259
+ key == self.go_module or key.startswith(self.go_module + "/")
260
+ ):
261
+ rel = key[len(self.go_module) :].lstrip("/")
262
+ if rel in module_to_file:
263
+ key = rel
264
+ if key not in module_to_file:
265
+ segs = key.split("/")
266
+ for i in range(1, len(segs)):
267
+ cand = "/".join(segs[i:])
268
+ if cand in module_to_file:
269
+ key = cand
270
+ break
271
+ default_name = imp.get("default", "")
272
+ if key in module_to_file:
273
+ if _is_target(ps.path) and _add_edge(
274
+ f.id, module_to_file[key], EdgeKind.IMPORTS
275
+ ):
276
+ stats.imports_resolved += 1
277
+ sep = "/" if pack is not None and pack.module_style != "dotted" else "."
278
+ for nm in names:
279
+ tgt = exports.get(key, {}).get(nm)
280
+ if tgt:
281
+ binding[nm] = tgt
282
+ continue
283
+ # `from pkg import sub` where `sub` is an in-repo *submodule*
284
+ # (not a def of pkg): alias the local name to that module so
285
+ # `sub.f()` / `extends sub.Base` resolve to its exports, and
286
+ # point IMPORTS at the submodule file (BUG-006 aliased import).
287
+ sub_key = f"{key}{sep}{nm}" if key else nm
288
+ if sub_key in module_to_file:
289
+ module_alias.setdefault(f.id, {})[nm] = sub_key
290
+ if _is_target(ps.path) and _add_edge(
291
+ f.id, module_to_file[sub_key], EdgeKind.IMPORTS
292
+ ):
293
+ stats.imports_resolved += 1
294
+ # CommonJS default require: bind the local name to the target
295
+ # module's `module.exports = <name>` symbol (BUG-006).
296
+ if default_name:
297
+ exp = file_default.get(key, "")
298
+ tgt = exports.get(key, {}).get(exp) if exp else None
299
+ if tgt:
300
+ binding[default_name] = tgt
301
+ # also a module alias, so `default_name.f()` reaches a
302
+ # top-level export `f` of the module (BUG-006 member access).
303
+ module_alias.setdefault(f.id, {})[default_name] = key
304
+ # whole-module import (`import m`): `m` aliases the module, so
305
+ # `m.f()` resolves to its top-level export `f` (BUG-006).
306
+ elif not names:
307
+ module_alias.setdefault(f.id, {})[module] = key
308
+ # wildcard import (Ruby `require_relative`): a name-less in-repo
309
+ # import makes all the target file's top-level defs callable.
310
+ if pack is not None and pack.wildcard_import and not names and not default_name:
311
+ for nm, tgt in exports.get(key, {}).items():
312
+ binding.setdefault(nm, tgt)
313
+ else:
314
+ pid = _external(ps.lang, ps.repo, module)
315
+ if _is_target(ps.path) and _add_edge(f.id, pid, EdgeKind.IMPORTS):
316
+ stats.imports_external += 1
317
+ for nm in names:
318
+ binding.setdefault(nm, pid)
319
+ if default_name:
320
+ binding.setdefault(default_name, pid)
321
+ if not names and not default_name:
322
+ binding.setdefault(module.split(".")[-1], pid)
323
+
324
+ # --- calls -> CALLS edges (unique match only) ---
325
+ path_to_file = {SymbolID.parse(f.id).path: f.id for f in files}
326
+ node_by_id = {n.id: n for n in all_nodes}
327
+ # BUG-006: lazily resolve `self.f()`/`this.f()` to the *enclosing class's*
328
+ # method — a unique, safe match (ADR-0004). Caches keep it cheap and
329
+ # deterministic (methods sorted by id, like the export map above).
330
+ method_cache: dict[str, dict[str, str]] = {}
331
+ enclosing_cache: dict[str, str | None] = {}
332
+
333
+ async def _methods_of(class_id: str) -> dict[str, str]:
334
+ cached = method_cache.get(class_id)
335
+ if cached is None:
336
+ members = sorted(
337
+ await store.neighbors(class_id, [EdgeKind.CONTAINS], depth=1),
338
+ key=lambda m: m.id,
339
+ )
340
+ cached = {m.name: m.id for m in members}
341
+ method_cache[class_id] = cached
342
+ return cached
343
+
344
+ async def _enclosing_class(node_id: str) -> str | None:
345
+ if node_id in enclosing_cache:
346
+ return enclosing_cache[node_id]
347
+ cls: str | None = None
348
+ for e in await store.adjacent(node_id, [EdgeKind.CONTAINS], "in"):
349
+ parent = node_by_id.get(e.src)
350
+ if parent is not None and parent.kind is NodeKind.CLASS:
351
+ cls = e.src
352
+ break
353
+ enclosing_cache[node_id] = cls
354
+ return cls
355
+
356
+ # --- inheritance -> INHERITS edges (subclass -> base; unique match) ---
357
+ # Resolve bases first and keep a superclass map, so the call loop below can
358
+ # walk it for inherited `self.f()` (the method is defined on a base class).
359
+ superclasses: dict[str, list[str]] = {}
360
+ for n in all_nodes:
361
+ bases = n.attrs.get("bases")
362
+ if not bases or n.kind is not NodeKind.CLASS:
363
+ continue
364
+ owner_file = path_to_file.get(SymbolID.parse(n.id).path)
365
+ local = exports.get(file_module.get(owner_file, ""), {}) if owner_file else {}
366
+ binding = bindings.get(owner_file, {}) if owner_file else {}
367
+ aliases = module_alias.get(owner_file, {}) if owner_file else {}
368
+ resolved: list[str] = []
369
+ for base in bases:
370
+ bt = local.get(base) or binding.get(base)
371
+ # qualified base `mod.Base`: resolve `mod` as an imported module
372
+ # alias, then `Base` as that module's top-level export (BUG-006).
373
+ if bt is None and "." in base:
374
+ recv, _, base_name = base.rpartition(".")
375
+ mod_key = aliases.get(recv)
376
+ if mod_key is not None:
377
+ bt = exports.get(mod_key, {}).get(base_name)
378
+ # only an in-repo class is a valid base (external/by-name-only stays
379
+ # unresolved — never guessed, ADR-0004)
380
+ tnode = node_by_id.get(bt) if bt else None
381
+ if tnode is not None and tnode.kind is NodeKind.CLASS and bt is not None:
382
+ resolved.append(bt)
383
+ if not resolved:
384
+ continue
385
+ superclasses[n.id] = resolved
386
+ if _is_target(SymbolID.parse(n.id).path):
387
+ for b in resolved:
388
+ if _add_edge(n.id, b, EdgeKind.INHERITS):
389
+ stats.inherits_resolved += 1
390
+
391
+ async def _inherited_method(class_id: str, name: str) -> str | None:
392
+ """A method ``name`` defined on a *base* of ``class_id`` — resolved only
393
+ when exactly one base in the transitive closure defines it (no MRO
394
+ guessing across multiple definers, ADR-0004)."""
395
+ seen: set[str] = set()
396
+ found: set[str] = set()
397
+ frontier = list(superclasses.get(class_id, []))
398
+ while frontier:
399
+ b = frontier.pop()
400
+ if b in seen:
401
+ continue
402
+ seen.add(b)
403
+ m = (await _methods_of(b)).get(name)
404
+ if m:
405
+ found.add(m)
406
+ frontier.extend(superclasses.get(b, []))
407
+ return next(iter(found)) if len(found) == 1 else None
408
+
409
+ # Go: methods are package-scoped and attached to a receiver type, not
410
+ # AST-nested in it. Index them by (package, type) so a call on a method's
411
+ # own receiver (`s.f()`) resolves to a method of that type (BUG-006).
412
+ go_methods: dict[tuple[str, str], dict[str, str]] = {}
413
+ for n in sorted(all_nodes, key=lambda z: z.id):
414
+ rtype = n.attrs.get("recv_type")
415
+ if not rtype:
416
+ continue
417
+ owner = path_to_file.get(SymbolID.parse(n.id).path, "")
418
+ go_methods.setdefault((file_module.get(owner, ""), rtype), {})[n.name] = n.id
419
+
420
+ for n in all_nodes:
421
+ refs = n.attrs.get("refs")
422
+ if not refs:
423
+ continue
424
+ ps = SymbolID.parse(n.id)
425
+ if not _is_target(ps.path):
426
+ continue
427
+ owner_file = path_to_file.get(ps.path)
428
+ local = exports.get(file_module.get(owner_file, ""), {}) if owner_file else {}
429
+ binding = bindings.get(owner_file, {}) if owner_file else {}
430
+ aliases = module_alias.get(owner_file, {}) if owner_file else {}
431
+ for ref in refs:
432
+ nm = ref.get("name")
433
+ recv = ref.get("recv")
434
+ target: str | None = None
435
+ if not nm:
436
+ target = None
437
+ elif recv in _SELF_RECV:
438
+ # an intra-class call: bind to a method of the enclosing class,
439
+ # or — failing that — a method inherited from a unique base.
440
+ cls = await _enclosing_class(n.id)
441
+ if cls is not None:
442
+ target = (await _methods_of(cls)).get(nm)
443
+ if target is None:
444
+ target = await _inherited_method(cls, nm)
445
+ elif recv is not None and recv == n.attrs.get("recv_var"):
446
+ # Go: a call on the method's own receiver (`s.f()`) → a method
447
+ # of the receiver's type.
448
+ key = (file_module.get(owner_file or "", ""), str(n.attrs.get("recv_type", "")))
449
+ target = go_methods.get(key, {}).get(nm)
450
+ elif recv is not None:
451
+ # `m.f()` where `m` is an imported module → its export `f`;
452
+ # any other receiver is not a unique target (never guessed
453
+ # onto a same-named module-level def, ADR-0004).
454
+ mod_key = aliases.get(recv)
455
+ if mod_key is not None:
456
+ target = exports.get(mod_key, {}).get(nm)
457
+ else:
458
+ target = local.get(nm) or binding.get(nm)
459
+ if target and target not in packages: # external pkg isn't a callable target
460
+ if _add_edge(n.id, target, EdgeKind.CALLS):
461
+ stats.refs_resolved += 1
462
+ else:
463
+ stats.refs_unresolved += 1
464
+
465
+ if new_nodes or edges:
466
+ await store.add([*new_nodes, *edges]) # nodes first: edge endpoints must exist
467
+ return stats
@@ -0,0 +1,79 @@
1
+ """``RepoSource`` — walk a repository and yield one ``SourceFile`` per
2
+ indexable file. The pipeline's only filesystem boundary.
3
+
4
+ Files with no matching pack are skipped silently (not our languages); files
5
+ excluded by glob or over the size limit are skipped *and recorded* in
6
+ ``skipped`` so the count surfaces in the IndexReport — never a silent gap.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ from collections.abc import Iterator
13
+ from pathlib import Path, PurePosixPath
14
+
15
+ from agentforge_graph.config import DEFAULT_EXCLUDES
16
+ from agentforge_graph.core import SourceFile
17
+
18
+ from .pack import PackRegistry
19
+
20
+
21
+ def read_go_module(root: str | Path) -> str:
22
+ """The ``module`` path from a repo's ``go.mod`` (e.g.
23
+ ``github.com/spf13/cobra``), or ``""`` if absent. The resolver strips this
24
+ prefix to map a Go import path to an in-repo package dir, including the
25
+ *root* package (whose dir key is ``""`` and can't be suffix-matched)."""
26
+ try:
27
+ text = (Path(root) / "go.mod").read_text(encoding="utf-8", errors="replace")
28
+ except OSError:
29
+ return ""
30
+ for line in text.splitlines():
31
+ stripped = line.strip()
32
+ if stripped.startswith("module "):
33
+ return stripped[len("module ") :].strip()
34
+ return ""
35
+
36
+
37
+ class RepoSource:
38
+ def __init__(
39
+ self,
40
+ root: str | Path,
41
+ include: list[str] | None = None,
42
+ exclude: list[str] | None = None,
43
+ max_file_kb: int = 512,
44
+ ) -> None:
45
+ self.root = Path(root)
46
+ self.include = include
47
+ self.exclude = list(DEFAULT_EXCLUDES) if exclude is None else exclude
48
+ self.max_file_kb = max_file_kb
49
+ self.skipped: list[str] = []
50
+
51
+ def iter_files(self, registry: PackRegistry) -> Iterator[SourceFile]:
52
+ self.skipped = [] # reset per walk so repeated walks (feat-004) stay accurate
53
+ for path in sorted(self.root.rglob("*")):
54
+ if not path.is_file():
55
+ continue
56
+ rel = path.relative_to(self.root).as_posix()
57
+ if self._is_excluded(rel) or not self._is_included(rel):
58
+ continue
59
+ pack = registry.for_extension(path.suffix)
60
+ if pack is None: # not a language we index
61
+ continue
62
+ if path.stat().st_size > self.max_file_kb * 1024:
63
+ self.skipped.append(f"{rel} (> {self.max_file_kb}KB)")
64
+ continue
65
+ raw = path.read_bytes()
66
+ yield SourceFile(
67
+ path=rel,
68
+ text=raw.decode("utf-8", errors="replace"),
69
+ language=pack.lang_slug,
70
+ content_hash=hashlib.sha256(raw).hexdigest(),
71
+ )
72
+
73
+ def _is_excluded(self, rel: str) -> bool:
74
+ return any(PurePosixPath(rel).full_match(glob) for glob in self.exclude)
75
+
76
+ def _is_included(self, rel: str) -> bool:
77
+ if self.include is None:
78
+ return True
79
+ return any(PurePosixPath(rel).full_match(glob) for glob in self.include)
@@ -0,0 +1,28 @@
1
+ """ADR & docs ingestion (feat-010): connect architecture decisions to the code
2
+ they govern — the gap no surveyed tool fills (research §3.3).
3
+
4
+ MVP: ADR markdown → ``Decision`` nodes (+ body ``DocChunk``s) with **parsed**
5
+ ``GOVERNS``/``SUPERSEDES`` edges, ingested as per-ADR ``FileSubgraph`` upserts
6
+ (so they ride feat-004 incrementality). Retrieval surfaces a governing decision
7
+ when its governed code is retrieved. Zero ``agentforge`` imports (ADR-0001).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from .adr import ADRParser, ParsedADR
13
+ from .commits import CommitIngestor
14
+ from .ingest import KnowledgeIngestor
15
+ from .mentions import Mentions, extract_mentions, resolve_mentions
16
+ from .report import DecisionInfo, KnowledgeStats
17
+
18
+ __all__ = [
19
+ "ADRParser",
20
+ "ParsedADR",
21
+ "CommitIngestor",
22
+ "KnowledgeIngestor",
23
+ "Mentions",
24
+ "extract_mentions",
25
+ "resolve_mentions",
26
+ "DecisionInfo",
27
+ "KnowledgeStats",
28
+ ]