agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,37 @@
1
+ """The Ruby language pack (Tier A — structure + require_relative resolution).
2
+
3
+ Ruby modules/classes nest methods; `require_relative "./x"` is path-based and
4
+ resolves in-repo (``module_style="relative"``), while `require "gem"` stays
5
+ external. Ruby also autoloads (Rails) and uses heavy metaprogramming, so the
6
+ import graph is sparser than in static languages — symbol extraction is the
7
+ primary value; receiver-qualified calls stay unresolved (ADR-0004).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+ from agentforge_graph.core import NodeKind
15
+ from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
16
+
17
+ _HERE = Path(__file__).parent
18
+
19
+ RUBY_PACK = LanguagePack(
20
+ language="ruby",
21
+ lang_slug="rb",
22
+ grammar="ruby",
23
+ extensions=(".rb",),
24
+ structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
25
+ reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
26
+ descriptor_rules=DescriptorRules(
27
+ kinds={
28
+ "def.class": NodeKind.CLASS, # class + module
29
+ "def.function": NodeKind.FUNCTION, # def (promoted to Method in a class)
30
+ "def.method": NodeKind.METHOD, # def self.x
31
+ "def.variable": NodeKind.VARIABLE, # constant assignment
32
+ }
33
+ ),
34
+ module_style="relative", # require_relative paths are relative to the file
35
+ relative_bare=True, # `require_relative "thor/x"` (bare) is still file-relative
36
+ wildcard_import=True, # require_relative brings in all the file's top-level defs
37
+ )
@@ -0,0 +1,12 @@
1
+ ; Ruby reference queries (feat-002, pack-ruby).
2
+ ; A call's `method:` field is the called name, whether or not it has a receiver
3
+ ; (`foo(...)`, `obj.foo(...)`, `Mod::Klass.foo(...)`). The second pattern captures
4
+ ; the receiver (@call.recv) so `self.foo()` binds to the enclosing class's method
5
+ ; (BUG-006); other receivers stay unresolved (member access, ADR-0004).
6
+
7
+ (call
8
+ method: (identifier) @call.callee) @call
9
+
10
+ (call
11
+ receiver: (_) @call.recv
12
+ method: (identifier) @call.callee) @call
@@ -0,0 +1,37 @@
1
+ ; Ruby structure queries (feat-002, pack-ruby).
2
+ ; Shares the capture vocabulary so edge kinds mean the same as other packs.
3
+
4
+ ; --- definitions ---
5
+ ; module + class are named containers -> Class; nested defs become methods.
6
+ (module
7
+ name: (constant) @name) @def.class
8
+
9
+ (class
10
+ name: (constant) @name) @def.class
11
+
12
+ ; --- inheritance (INHERITS): `class B < A` ---
13
+ (class
14
+ superclass: (superclass (constant) @base.name)) @base.def
15
+
16
+ ; `def foo` -> Function (promoted to Method when nested in a class/module body).
17
+ (method
18
+ name: (identifier) @name) @def.function
19
+
20
+ ; `def self.foo` (class method) -> Method.
21
+ (singleton_method
22
+ name: (identifier) @name) @def.method
23
+
24
+ ; constant assignment (`PI = 3.14`) -> Variable. Only `constant` (Uppercase) lefts
25
+ ; match, so local variables (lowercase identifiers) are not captured.
26
+ (assignment
27
+ left: (constant) @name) @def.variable
28
+
29
+ ; --- imports ---
30
+ ; `require_relative "thor/command"` is always file-relative (bare or `./`),
31
+ ; resolved in-repo via relative_bare. Plain `require "gem"` is load-path based
32
+ ; (lib-root relative) and is left to a follow-up — capturing it here would
33
+ ; mis-resolve against the importer's dir, so only require_relative is taken.
34
+ (call
35
+ method: (identifier) @_req
36
+ arguments: (argument_list (string (string_content) @import.module))
37
+ (#eq? @_req "require_relative")) @import
@@ -0,0 +1,39 @@
1
+ """The Rust language pack (Tier A — structure + path-derived module resolution).
2
+
3
+ Rust's module path is implicit in the file layout (`src/a/b.rs` is module `a::b`),
4
+ so the pack derives each file's module from its path (`namespace_from_path`) and
5
+ resolves `use crate::a::b::Item` to the file declaring `Item` (FQN-style, sep
6
+ `::`, with `crate::` stripped). Extracts struct/enum/union/impl (→Class), trait
7
+ (→Interface), functions + methods, const/static (→Variable), type aliases. `impl`
8
+ blocks attach their methods to the type. Method/path calls stay unresolved
9
+ (ADR-0004); grouped/glob `use` is a follow-up.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from pathlib import Path
15
+
16
+ from agentforge_graph.core import NodeKind
17
+ from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
18
+
19
+ _HERE = Path(__file__).parent
20
+
21
+ RUST_PACK = LanguagePack(
22
+ language="rust",
23
+ lang_slug="rs",
24
+ grammar="rust",
25
+ extensions=(".rs",),
26
+ structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
27
+ reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
28
+ descriptor_rules=DescriptorRules(
29
+ kinds={
30
+ "def.class": NodeKind.CLASS, # struct + enum + union + impl
31
+ "def.interface": NodeKind.INTERFACE, # trait
32
+ "def.function": NodeKind.FUNCTION, # fn + trait method sig (promoted)
33
+ "def.variable": NodeKind.VARIABLE, # const + static
34
+ "def.type": NodeKind.TYPE_ALIAS,
35
+ }
36
+ ),
37
+ namespace_sep="::",
38
+ namespace_from_path=True, # module path is the file path, not a declaration
39
+ )
@@ -0,0 +1,12 @@
1
+ ; Rust reference queries (feat-002, pack-rust).
2
+ ; Plain call `f(...)` and method call `x.f(...)`. @call.recv captures the
3
+ ; receiver so `self.f()` binds to the enclosing impl's method (BUG-006); other
4
+ ; receivers stay unresolved (member access, ADR-0004).
5
+
6
+ (call_expression
7
+ function: (identifier) @call.callee) @call
8
+
9
+ (call_expression
10
+ function: (field_expression
11
+ value: (_) @call.recv
12
+ field: (field_identifier) @call.callee)) @call
@@ -0,0 +1,46 @@
1
+ ; Rust structure queries (feat-002, pack-rust).
2
+ ; Module path is derived from the file path (namespace_from_path), so a
3
+ ; `use crate::a::b::Item` resolves to the file declaring Item. `mod` blocks are
4
+ ; scopes, not def nodes (so items in them keep their correct kind).
5
+
6
+ ; --- definitions ---
7
+ (struct_item
8
+ name: (type_identifier) @name) @def.class
9
+
10
+ (enum_item
11
+ name: (type_identifier) @name) @def.class
12
+
13
+ (union_item
14
+ name: (type_identifier) @name) @def.class
15
+
16
+ ; a trait is an interface (named method/contract container).
17
+ (trait_item
18
+ name: (type_identifier) @name) @def.interface
19
+
20
+ ; `impl Type { … }` / `impl Trait for Type { … }` -> attach methods to the Type
21
+ ; (merges with the struct/enum node of the same name; methods nest -> Method).
22
+ (impl_item
23
+ type: (type_identifier) @name) @def.class
24
+
25
+ ; free functions; in an impl/trait body they promote to Method by nesting.
26
+ (function_item
27
+ name: (identifier) @name) @def.function
28
+
29
+ ; trait method signatures (`fn draw(&self);`).
30
+ (function_signature_item
31
+ name: (identifier) @name) @def.function
32
+
33
+ (const_item
34
+ name: (identifier) @name) @def.variable
35
+
36
+ (static_item
37
+ name: (identifier) @name) @def.variable
38
+
39
+ (type_item
40
+ name: (type_identifier) @name) @def.type
41
+
42
+ ; --- imports ---
43
+ ; `use crate::shapes::Shape;` -> path naming an item (FQN-style resolution).
44
+ ; Grouped/glob uses (`use a::{B, C}`, `use a::*`) are a follow-up.
45
+ (use_declaration
46
+ (scoped_identifier) @import.module) @import
@@ -0,0 +1,31 @@
1
+ """The TypeScript language pack (Tier A — structure + import resolution)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from agentforge_graph.core import NodeKind
8
+ from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
9
+
10
+ _HERE = Path(__file__).parent
11
+
12
+ TYPESCRIPT_PACK = LanguagePack(
13
+ language="typescript",
14
+ lang_slug="ts",
15
+ grammar="typescript",
16
+ extensions=(".ts",),
17
+ structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
18
+ reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
19
+ descriptor_rules=DescriptorRules(
20
+ kinds={
21
+ "def.class": NodeKind.CLASS,
22
+ "def.function": NodeKind.FUNCTION, # promoted to METHOD inside a class
23
+ # ENH-008: broaden the TS surface beyond class/function/method.
24
+ "def.interface": NodeKind.INTERFACE,
25
+ "def.enum": NodeKind.CLASS, # no dedicated Enum kind; a nominal type
26
+ "def.type": NodeKind.TYPE_ALIAS,
27
+ "def.variable": NodeKind.VARIABLE, # module-level const tables/enums
28
+ }
29
+ ),
30
+ module_style="relative", # TS imports are path specifiers (./util), not dotted
31
+ )
@@ -0,0 +1,11 @@
1
+ ; TypeScript reference queries (feat-002, pack-ts).
2
+ ; Plain call `f(...)` and method/attribute call `recv.f(...)`. @call.recv is the
3
+ ; receiver (BUG-006), so `this.f()` binds to the enclosing class's method.
4
+
5
+ (call_expression
6
+ function: (identifier) @call.callee) @call
7
+
8
+ (call_expression
9
+ function: (member_expression
10
+ object: (_) @call.recv
11
+ property: (property_identifier) @call.callee)) @call
@@ -0,0 +1,99 @@
1
+ ; TypeScript structure queries (feat-002, pack-ts).
2
+ ; Mirrors the Python pack's capture vocabulary so edge kinds mean the same.
3
+ ; Definitions may be wrapped in `export_statement`; queries match nested.
4
+
5
+ ; --- definitions ---
6
+ (class_declaration
7
+ name: (type_identifier) @name) @def.class
8
+
9
+ ; `abstract class Foo {}` is a distinct node from class_declaration; capture it
10
+ ; as the same Class kind so abstract base classes + their methods are extracted
11
+ ; (BUG-005). JS has no `abstract`, so this is TS-only.
12
+ (abstract_class_declaration
13
+ name: (type_identifier) @name) @def.class
14
+
15
+ ; --- inheritance (INHERITS): `class B extends A` (interfaces via `implements`
16
+ ; are a separate relation, not captured here). ---
17
+ (class_declaration
18
+ (class_heritage (extends_clause value: (identifier) @base.name))) @base.def
19
+ (abstract_class_declaration
20
+ (class_heritage (extends_clause value: (identifier) @base.name))) @base.def
21
+
22
+ ; qualified base `class B extends mod.Base` -> base `mod.Base`; the resolver splits
23
+ ; the receiver and binds it via the importing module alias.
24
+ (class_declaration
25
+ (class_heritage (extends_clause value: (member_expression) @base.name))) @base.def
26
+ (abstract_class_declaration
27
+ (class_heritage (extends_clause value: (member_expression) @base.name))) @base.def
28
+
29
+ (function_declaration
30
+ name: (identifier) @name) @def.function
31
+
32
+ ; methods live in a class_body -> promoted to METHOD by the extractor
33
+ (method_definition
34
+ name: (property_identifier) @name) @def.function
35
+
36
+ ; --- JSDoc/TSDoc docstrings (DESCRIBES) ---
37
+ ; a `/** … */` block comment immediately before a function/class/method becomes a
38
+ ; DocChunk that DESCRIBES the symbol (feat-010); `#match?` keeps only `/**` blocks.
39
+ ((comment) @docstring . (function_declaration) @doc.owner
40
+ (#match? @docstring "^/[*][*]"))
41
+ ((comment) @docstring . (class_declaration) @doc.owner
42
+ (#match? @docstring "^/[*][*]"))
43
+ ((comment) @docstring . (abstract_class_declaration) @doc.owner
44
+ (#match? @docstring "^/[*][*]"))
45
+ (class_body
46
+ (comment) @docstring . (method_definition) @doc.owner
47
+ (#match? @docstring "^/[*][*]"))
48
+
49
+ ; --- TS type surface (ENH-008) ---
50
+ ; `interface Foo {}` -> Interface (the dominant way TS describes contracts).
51
+ (interface_declaration
52
+ name: (type_identifier) @name) @def.interface
53
+
54
+ ; `enum E {}` and `const enum E {}` -> Class (no dedicated Enum kind; a named
55
+ ; nominal type with members). Both parse as enum_declaration with an identifier.
56
+ (enum_declaration
57
+ name: (identifier) @name) @def.enum
58
+
59
+ ; `type ID = ...` -> TypeAlias.
60
+ (type_alias_declaration
61
+ name: (type_identifier) @name) @def.type
62
+
63
+ ; --- value bindings (ENH-008, shared with JS) ---
64
+ ; `const f = (…) => …` / `const f = function () {}` -> Function (named from the
65
+ ; binding). Captured at any depth — these are genuine functions.
66
+ (lexical_declaration
67
+ (variable_declarator
68
+ name: (identifier) @name
69
+ value: [(arrow_function) (function_expression)])) @def.function
70
+
71
+ ; module-level const data tables / const-object enums -> Variable. Scoped to the
72
+ ; top level (program / export) so locals inside function bodies don't inflate the
73
+ ; graph. Only object/array initializers — NOT call results: `const x =
74
+ ; require(...)` is an import binding (BUG-006), and call-bound public constants
75
+ ; (e.g. zod's `ZodIssueCode = arrayToEnum([...])`) stay findable via their
76
+ ; companion `type X = ...` alias, captured above as a TypeAlias.
77
+ (program
78
+ (lexical_declaration
79
+ (variable_declarator
80
+ name: (identifier) @name
81
+ value: [(object) (array)])) @def.variable)
82
+ (program
83
+ (export_statement
84
+ (lexical_declaration
85
+ (variable_declarator
86
+ name: (identifier) @name
87
+ value: [(object) (array)])) @def.variable))
88
+
89
+ ; --- imports ---
90
+ ; `import { a, b } from "./mod"` -> module (relative path) + bound names
91
+ (import_statement
92
+ (import_clause (named_imports (import_specifier name: (identifier) @import.name)))
93
+ source: (string (string_fragment) @import.module)) @import
94
+
95
+ ; `import * as ns from "./mod"` -> the namespace alias binds the whole module, so
96
+ ; `ns.foo()` and a qualified base `extends ns.Base` resolve to its exports (BUG-006).
97
+ (import_statement
98
+ (import_clause (namespace_import (identifier) @import.default))
99
+ source: (string (string_fragment) @import.module)) @import
@@ -0,0 +1,134 @@
1
+ """``IngestPipeline`` — drives the two passes over a whole repo.
2
+
3
+ Extraction is CPU-bound and file-isolated, so files are parsed on a thread
4
+ pool with bounded concurrency; the store serializes its own writes. A fresh
5
+ ``TreeSitterExtractor`` is built inside each worker thread because a
6
+ tree-sitter ``Parser`` is not safe to share across threads (the grammar
7
+ itself is cached, so only the lightweight parser/query objects are rebuilt).
8
+ After all files are upserted, the resolver runs once.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+
15
+ from agentforge_graph.core import FileSubgraph, GraphStore, SourceFile
16
+ from agentforge_graph.frameworks import FrameworkExtractor
17
+
18
+ from .extractor import TreeSitterExtractor
19
+ from .pack import LanguagePack, PackRegistry
20
+ from .report import IndexReport
21
+ from .resolver import ImportResolver
22
+ from .source import RepoSource, read_go_module
23
+
24
+
25
+ def _extract_one(
26
+ pack: LanguagePack,
27
+ repo: str,
28
+ commit: str,
29
+ sf: SourceFile,
30
+ frameworks: FrameworkExtractor | None,
31
+ ) -> tuple[FileSubgraph, int]:
32
+ # Built and used entirely within the worker thread (parser is not shareable).
33
+ sg = TreeSitterExtractor(pack, repo, commit).extract(sf)
34
+ unresolved = 0
35
+ if frameworks is not None and frameworks.active:
36
+ facts = frameworks.extract(sf, repo, commit) # feat-011: routes/etc.
37
+ unresolved = facts.unresolved
38
+ if facts.nodes or facts.edges:
39
+ sg = sg.model_copy(
40
+ update={"nodes": [*sg.nodes, *facts.nodes], "edges": [*sg.edges, *facts.edges]}
41
+ )
42
+ return sg, unresolved
43
+
44
+
45
+ class IngestPipeline:
46
+ def __init__(
47
+ self,
48
+ repo: str,
49
+ commit: str = "",
50
+ concurrency: int = 8,
51
+ frameworks: FrameworkExtractor | None = None,
52
+ ) -> None:
53
+ self.repo = repo
54
+ self.commit = commit
55
+ self.concurrency = concurrency
56
+ self.frameworks = frameworks
57
+
58
+ async def run(
59
+ self,
60
+ source: RepoSource,
61
+ store: GraphStore,
62
+ registry: PackRegistry,
63
+ paths: set[str] | None = None,
64
+ ) -> IndexReport:
65
+ """Extract + upsert each file, then resolve. When ``paths`` is given,
66
+ only those files are (re)extracted (feat-004 incremental scope); the
67
+ resolver is **not** run here — incremental refresh owns scoped
68
+ re-resolution. ``paths is None`` is the full-index path (resolve runs).
69
+ Active framework packs (feat-011) emit extra nodes/edges merged into
70
+ each file's subgraph, so they ride the same upsert + incrementality."""
71
+ report = IndexReport()
72
+ sem = asyncio.Semaphore(self.concurrency)
73
+
74
+ async def _do(sf: SourceFile) -> tuple[FileSubgraph, int] | None:
75
+ pack = registry.for_slug(sf.language)
76
+ if pack is None:
77
+ return None
78
+ async with sem:
79
+ result = await asyncio.to_thread(
80
+ _extract_one, pack, self.repo, self.commit, sf, self.frameworks
81
+ )
82
+ await store.upsert(result[0])
83
+ return result
84
+
85
+ files = (sf for sf in source.iter_files(registry) if paths is None or sf.path in paths)
86
+ results = await asyncio.gather(*[_do(sf) for sf in files])
87
+
88
+ for result in results:
89
+ if result is None:
90
+ continue
91
+ sg, unresolved = result
92
+ report.files_indexed += 1
93
+ report.nodes += len(sg.nodes)
94
+ report.edges += len(sg.edges)
95
+ report.framework_unresolved += unresolved
96
+ for n in sg.nodes:
97
+ report.by_node_kind[n.kind.value] = report.by_node_kind.get(n.kind.value, 0) + 1
98
+ for e in sg.edges:
99
+ report.by_edge_kind[e.kind.value] = report.by_edge_kind.get(e.kind.value, 0) + 1
100
+ report.skipped = list(source.skipped)
101
+ report.routes_extracted = report.by_node_kind.get("Route", 0)
102
+ report.models_extracted = report.by_node_kind.get("DataModel", 0)
103
+ report.services_extracted = report.by_node_kind.get("Service", 0)
104
+
105
+ if paths is not None:
106
+ # Scoped (incremental) extract: the caller re-resolves with the
107
+ # right import-graph scope. Edge tallies come from that pass.
108
+ return report
109
+
110
+ stats = await ImportResolver(
111
+ registry, self.commit, go_module=read_go_module(source.root)
112
+ ).resolve(store)
113
+ report.resolve = stats
114
+ imports = stats.imports_resolved + stats.imports_external
115
+ report.by_edge_kind["IMPORTS"] = report.by_edge_kind.get("IMPORTS", 0) + imports
116
+ report.by_edge_kind["CALLS"] = report.by_edge_kind.get("CALLS", 0) + stats.refs_resolved
117
+ if stats.inherits_resolved:
118
+ report.by_edge_kind["INHERITS"] = (
119
+ report.by_edge_kind.get("INHERITS", 0) + stats.inherits_resolved
120
+ )
121
+ report.edges += imports + stats.refs_resolved + stats.inherits_resolved
122
+
123
+ # feat-011 pass-2: stitch ORM relationship/FK string targets into
124
+ # RELATES_TO edges (and future router-prefix composition).
125
+ if self.frameworks is not None and self.frameworks.active:
126
+ resolved, unresolved = await self.frameworks.resolve(store, self.commit)
127
+ if resolved:
128
+ report.relations_resolved = resolved
129
+ report.by_edge_kind["RELATES_TO"] = (
130
+ report.by_edge_kind.get("RELATES_TO", 0) + resolved
131
+ )
132
+ report.edges += resolved
133
+ report.framework_unresolved += unresolved
134
+ return report
@@ -0,0 +1,84 @@
1
+ """Result types for an indexing run."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class ResolveStats(BaseModel):
9
+ """Outcome of the pass-2 resolver."""
10
+
11
+ imports_resolved: int = 0 # IMPORTS edges to in-repo files
12
+ imports_external: int = 0 # IMPORTS edges to external (stdlib/third-party) packages
13
+ refs_resolved: int = 0 # CALLS edges created (unique match)
14
+ refs_unresolved: int = 0 # call sites with zero/ambiguous targets (recorded, not guessed)
15
+ inherits_resolved: int = 0 # INHERITS edges created (base class -> in-repo class)
16
+
17
+
18
+ class RouteInfo(BaseModel):
19
+ """One extracted endpoint (feat-011), for ``CodeGraph.routes`` / ``ckg
20
+ routes`` / the ``ckg_routes`` tool."""
21
+
22
+ method: str
23
+ path: str
24
+ framework: str
25
+ handler: str # handler symbol id (HANDLED_BY target)
26
+ file: str
27
+ line: int
28
+
29
+ def to_dict(self) -> dict[str, object]:
30
+ return self.model_dump()
31
+
32
+
33
+ class ModelInfo(BaseModel):
34
+ """One extracted ORM data model (feat-011), for ``CodeGraph.models`` /
35
+ ``ckg models``."""
36
+
37
+ name: str # table name when known, else the class name
38
+ table: str # mapped table name ("" when not statically known)
39
+ framework: str
40
+ fields: list[str] # mapped column/field names
41
+ relations: list[dict[str, str]] # RELATES_TO out: {to, kind, via}
42
+ cls: str # the underlying class symbol id
43
+ file: str
44
+ line: int
45
+
46
+ def to_dict(self) -> dict[str, object]:
47
+ return self.model_dump()
48
+
49
+
50
+ class ServiceInfo(BaseModel):
51
+ """One DI-provided service (feat-011), for ``CodeGraph.services`` /
52
+ ``ckg services``."""
53
+
54
+ name: str # the provider (dependency) name
55
+ framework: str
56
+ injected_into: list[str] # consumer symbol ids the service is INJECTED_INTO
57
+ file: str
58
+ line: int
59
+
60
+ def to_dict(self) -> dict[str, object]:
61
+ return self.model_dump()
62
+
63
+
64
+ class IndexReport(BaseModel):
65
+ """Summary of a full ``IngestPipeline.run`` / ``CodeGraph.index``."""
66
+
67
+ files_indexed: int = 0
68
+ nodes: int = 0
69
+ edges: int = 0
70
+ by_node_kind: dict[str, int] = Field(default_factory=dict)
71
+ by_edge_kind: dict[str, int] = Field(default_factory=dict)
72
+ skipped: list[str] = Field(default_factory=list)
73
+ resolve: ResolveStats = Field(default_factory=ResolveStats)
74
+ routes_extracted: int = 0 # feat-011: framework Route nodes emitted
75
+ models_extracted: int = 0 # feat-011: ORM DataModel nodes emitted
76
+ services_extracted: int = 0 # feat-011: DI Service nodes emitted
77
+ relations_resolved: int = 0 # feat-011: RELATES_TO edges from ORM relationship/FK targets
78
+ framework_unresolved: int = 0 # framework registrations seen but not extractable
79
+ decisions_indexed: int = 0 # feat-010: ADR Decision nodes
80
+ governs_resolved: int = 0 # GOVERNS edges from unambiguous ADR mentions
81
+ mentions_unresolved: int = 0 # ADR mentions seen but not linked
82
+ docs_indexed: int = 0 # feat-010: general doc files (doc_globs) ingested
83
+ describes_resolved: int = 0 # DESCRIBES edges from unambiguous doc mentions
84
+ commits_indexed: int = 0 # feat-010: commit messages ingested as DocChunks