agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""The Ruby language pack (Tier A — structure + require_relative resolution).
|
|
2
|
+
|
|
3
|
+
Ruby modules/classes nest methods; `require_relative "./x"` is path-based and
|
|
4
|
+
resolves in-repo (``module_style="relative"``), while `require "gem"` stays
|
|
5
|
+
external. Ruby also autoloads (Rails) and uses heavy metaprogramming, so the
|
|
6
|
+
import graph is sparser than in static languages — symbol extraction is the
|
|
7
|
+
primary value; receiver-qualified calls stay unresolved (ADR-0004).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from agentforge_graph.core import NodeKind
|
|
15
|
+
from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
|
|
16
|
+
|
|
17
|
+
_HERE = Path(__file__).parent
|
|
18
|
+
|
|
19
|
+
RUBY_PACK = LanguagePack(
|
|
20
|
+
language="ruby",
|
|
21
|
+
lang_slug="rb",
|
|
22
|
+
grammar="ruby",
|
|
23
|
+
extensions=(".rb",),
|
|
24
|
+
structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
|
|
25
|
+
reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
|
|
26
|
+
descriptor_rules=DescriptorRules(
|
|
27
|
+
kinds={
|
|
28
|
+
"def.class": NodeKind.CLASS, # class + module
|
|
29
|
+
"def.function": NodeKind.FUNCTION, # def (promoted to Method in a class)
|
|
30
|
+
"def.method": NodeKind.METHOD, # def self.x
|
|
31
|
+
"def.variable": NodeKind.VARIABLE, # constant assignment
|
|
32
|
+
}
|
|
33
|
+
),
|
|
34
|
+
module_style="relative", # require_relative paths are relative to the file
|
|
35
|
+
relative_bare=True, # `require_relative "thor/x"` (bare) is still file-relative
|
|
36
|
+
wildcard_import=True, # require_relative brings in all the file's top-level defs
|
|
37
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
; Ruby reference queries (feat-002, pack-ruby).
|
|
2
|
+
; A call's `method:` field is the called name, whether or not it has a receiver
|
|
3
|
+
; (`foo(...)`, `obj.foo(...)`, `Mod::Klass.foo(...)`). The second pattern captures
|
|
4
|
+
; the receiver (@call.recv) so `self.foo()` binds to the enclosing class's method
|
|
5
|
+
; (BUG-006); other receivers stay unresolved (member access, ADR-0004).
|
|
6
|
+
|
|
7
|
+
(call
|
|
8
|
+
method: (identifier) @call.callee) @call
|
|
9
|
+
|
|
10
|
+
(call
|
|
11
|
+
receiver: (_) @call.recv
|
|
12
|
+
method: (identifier) @call.callee) @call
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
; Ruby structure queries (feat-002, pack-ruby).
|
|
2
|
+
; Shares the capture vocabulary so edge kinds mean the same as other packs.
|
|
3
|
+
|
|
4
|
+
; --- definitions ---
|
|
5
|
+
; module + class are named containers -> Class; nested defs become methods.
|
|
6
|
+
(module
|
|
7
|
+
name: (constant) @name) @def.class
|
|
8
|
+
|
|
9
|
+
(class
|
|
10
|
+
name: (constant) @name) @def.class
|
|
11
|
+
|
|
12
|
+
; --- inheritance (INHERITS): `class B < A` ---
|
|
13
|
+
(class
|
|
14
|
+
superclass: (superclass (constant) @base.name)) @base.def
|
|
15
|
+
|
|
16
|
+
; `def foo` -> Function (promoted to Method when nested in a class/module body).
|
|
17
|
+
(method
|
|
18
|
+
name: (identifier) @name) @def.function
|
|
19
|
+
|
|
20
|
+
; `def self.foo` (class method) -> Method.
|
|
21
|
+
(singleton_method
|
|
22
|
+
name: (identifier) @name) @def.method
|
|
23
|
+
|
|
24
|
+
; constant assignment (`PI = 3.14`) -> Variable. Only `constant` (Uppercase) lefts
|
|
25
|
+
; match, so local variables (lowercase identifiers) are not captured.
|
|
26
|
+
(assignment
|
|
27
|
+
left: (constant) @name) @def.variable
|
|
28
|
+
|
|
29
|
+
; --- imports ---
|
|
30
|
+
; `require_relative "thor/command"` is always file-relative (bare or `./`),
|
|
31
|
+
; resolved in-repo via relative_bare. Plain `require "gem"` is load-path based
|
|
32
|
+
; (lib-root relative) and is left to a follow-up — capturing it here would
|
|
33
|
+
; mis-resolve against the importer's dir, so only require_relative is taken.
|
|
34
|
+
(call
|
|
35
|
+
method: (identifier) @_req
|
|
36
|
+
arguments: (argument_list (string (string_content) @import.module))
|
|
37
|
+
(#eq? @_req "require_relative")) @import
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""The Rust language pack (Tier A — structure + path-derived module resolution).
|
|
2
|
+
|
|
3
|
+
Rust's module path is implicit in the file layout (`src/a/b.rs` is module `a::b`),
|
|
4
|
+
so the pack derives each file's module from its path (`namespace_from_path`) and
|
|
5
|
+
resolves `use crate::a::b::Item` to the file declaring `Item` (FQN-style, sep
|
|
6
|
+
`::`, with `crate::` stripped). Extracts struct/enum/union/impl (→Class), trait
|
|
7
|
+
(→Interface), functions + methods, const/static (→Variable), type aliases. `impl`
|
|
8
|
+
blocks attach their methods to the type. Method/path calls stay unresolved
|
|
9
|
+
(ADR-0004); grouped/glob `use` is a follow-up.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from agentforge_graph.core import NodeKind
|
|
17
|
+
from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
|
|
18
|
+
|
|
19
|
+
_HERE = Path(__file__).parent
|
|
20
|
+
|
|
21
|
+
RUST_PACK = LanguagePack(
|
|
22
|
+
language="rust",
|
|
23
|
+
lang_slug="rs",
|
|
24
|
+
grammar="rust",
|
|
25
|
+
extensions=(".rs",),
|
|
26
|
+
structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
|
|
27
|
+
reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
|
|
28
|
+
descriptor_rules=DescriptorRules(
|
|
29
|
+
kinds={
|
|
30
|
+
"def.class": NodeKind.CLASS, # struct + enum + union + impl
|
|
31
|
+
"def.interface": NodeKind.INTERFACE, # trait
|
|
32
|
+
"def.function": NodeKind.FUNCTION, # fn + trait method sig (promoted)
|
|
33
|
+
"def.variable": NodeKind.VARIABLE, # const + static
|
|
34
|
+
"def.type": NodeKind.TYPE_ALIAS,
|
|
35
|
+
}
|
|
36
|
+
),
|
|
37
|
+
namespace_sep="::",
|
|
38
|
+
namespace_from_path=True, # module path is the file path, not a declaration
|
|
39
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
; Rust reference queries (feat-002, pack-rust).
|
|
2
|
+
; Plain call `f(...)` and method call `x.f(...)`. @call.recv captures the
|
|
3
|
+
; receiver so `self.f()` binds to the enclosing impl's method (BUG-006); other
|
|
4
|
+
; receivers stay unresolved (member access, ADR-0004).
|
|
5
|
+
|
|
6
|
+
(call_expression
|
|
7
|
+
function: (identifier) @call.callee) @call
|
|
8
|
+
|
|
9
|
+
(call_expression
|
|
10
|
+
function: (field_expression
|
|
11
|
+
value: (_) @call.recv
|
|
12
|
+
field: (field_identifier) @call.callee)) @call
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
; Rust structure queries (feat-002, pack-rust).
|
|
2
|
+
; Module path is derived from the file path (namespace_from_path), so a
|
|
3
|
+
; `use crate::a::b::Item` resolves to the file declaring Item. `mod` blocks are
|
|
4
|
+
; scopes, not def nodes (so items in them keep their correct kind).
|
|
5
|
+
|
|
6
|
+
; --- definitions ---
|
|
7
|
+
(struct_item
|
|
8
|
+
name: (type_identifier) @name) @def.class
|
|
9
|
+
|
|
10
|
+
(enum_item
|
|
11
|
+
name: (type_identifier) @name) @def.class
|
|
12
|
+
|
|
13
|
+
(union_item
|
|
14
|
+
name: (type_identifier) @name) @def.class
|
|
15
|
+
|
|
16
|
+
; a trait is an interface (named method/contract container).
|
|
17
|
+
(trait_item
|
|
18
|
+
name: (type_identifier) @name) @def.interface
|
|
19
|
+
|
|
20
|
+
; `impl Type { … }` / `impl Trait for Type { … }` -> attach methods to the Type
|
|
21
|
+
; (merges with the struct/enum node of the same name; methods nest -> Method).
|
|
22
|
+
(impl_item
|
|
23
|
+
type: (type_identifier) @name) @def.class
|
|
24
|
+
|
|
25
|
+
; free functions; in an impl/trait body they promote to Method by nesting.
|
|
26
|
+
(function_item
|
|
27
|
+
name: (identifier) @name) @def.function
|
|
28
|
+
|
|
29
|
+
; trait method signatures (`fn draw(&self);`).
|
|
30
|
+
(function_signature_item
|
|
31
|
+
name: (identifier) @name) @def.function
|
|
32
|
+
|
|
33
|
+
(const_item
|
|
34
|
+
name: (identifier) @name) @def.variable
|
|
35
|
+
|
|
36
|
+
(static_item
|
|
37
|
+
name: (identifier) @name) @def.variable
|
|
38
|
+
|
|
39
|
+
(type_item
|
|
40
|
+
name: (type_identifier) @name) @def.type
|
|
41
|
+
|
|
42
|
+
; --- imports ---
|
|
43
|
+
; `use crate::shapes::Shape;` -> path naming an item (FQN-style resolution).
|
|
44
|
+
; Grouped/glob uses (`use a::{B, C}`, `use a::*`) are a follow-up.
|
|
45
|
+
(use_declaration
|
|
46
|
+
(scoped_identifier) @import.module) @import
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""The TypeScript language pack (Tier A — structure + import resolution)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from agentforge_graph.core import NodeKind
|
|
8
|
+
from agentforge_graph.ingest.pack import DescriptorRules, LanguagePack
|
|
9
|
+
|
|
10
|
+
_HERE = Path(__file__).parent
|
|
11
|
+
|
|
12
|
+
TYPESCRIPT_PACK = LanguagePack(
|
|
13
|
+
language="typescript",
|
|
14
|
+
lang_slug="ts",
|
|
15
|
+
grammar="typescript",
|
|
16
|
+
extensions=(".ts",),
|
|
17
|
+
structure_queries=(_HERE / "structure.scm").read_text(encoding="utf-8"),
|
|
18
|
+
reference_queries=(_HERE / "references.scm").read_text(encoding="utf-8"),
|
|
19
|
+
descriptor_rules=DescriptorRules(
|
|
20
|
+
kinds={
|
|
21
|
+
"def.class": NodeKind.CLASS,
|
|
22
|
+
"def.function": NodeKind.FUNCTION, # promoted to METHOD inside a class
|
|
23
|
+
# ENH-008: broaden the TS surface beyond class/function/method.
|
|
24
|
+
"def.interface": NodeKind.INTERFACE,
|
|
25
|
+
"def.enum": NodeKind.CLASS, # no dedicated Enum kind; a nominal type
|
|
26
|
+
"def.type": NodeKind.TYPE_ALIAS,
|
|
27
|
+
"def.variable": NodeKind.VARIABLE, # module-level const tables/enums
|
|
28
|
+
}
|
|
29
|
+
),
|
|
30
|
+
module_style="relative", # TS imports are path specifiers (./util), not dotted
|
|
31
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
; TypeScript reference queries (feat-002, pack-ts).
|
|
2
|
+
; Plain call `f(...)` and method/attribute call `recv.f(...)`. @call.recv is the
|
|
3
|
+
; receiver (BUG-006), so `this.f()` binds to the enclosing class's method.
|
|
4
|
+
|
|
5
|
+
(call_expression
|
|
6
|
+
function: (identifier) @call.callee) @call
|
|
7
|
+
|
|
8
|
+
(call_expression
|
|
9
|
+
function: (member_expression
|
|
10
|
+
object: (_) @call.recv
|
|
11
|
+
property: (property_identifier) @call.callee)) @call
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
; TypeScript structure queries (feat-002, pack-ts).
|
|
2
|
+
; Mirrors the Python pack's capture vocabulary so edge kinds mean the same.
|
|
3
|
+
; Definitions may be wrapped in `export_statement`; queries match nested.
|
|
4
|
+
|
|
5
|
+
; --- definitions ---
|
|
6
|
+
(class_declaration
|
|
7
|
+
name: (type_identifier) @name) @def.class
|
|
8
|
+
|
|
9
|
+
; `abstract class Foo {}` is a distinct node from class_declaration; capture it
|
|
10
|
+
; as the same Class kind so abstract base classes + their methods are extracted
|
|
11
|
+
; (BUG-005). JS has no `abstract`, so this is TS-only.
|
|
12
|
+
(abstract_class_declaration
|
|
13
|
+
name: (type_identifier) @name) @def.class
|
|
14
|
+
|
|
15
|
+
; --- inheritance (INHERITS): `class B extends A` (interfaces via `implements`
|
|
16
|
+
; are a separate relation, not captured here). ---
|
|
17
|
+
(class_declaration
|
|
18
|
+
(class_heritage (extends_clause value: (identifier) @base.name))) @base.def
|
|
19
|
+
(abstract_class_declaration
|
|
20
|
+
(class_heritage (extends_clause value: (identifier) @base.name))) @base.def
|
|
21
|
+
|
|
22
|
+
; qualified base `class B extends mod.Base` -> base `mod.Base`; the resolver splits
|
|
23
|
+
; the receiver and binds it via the importing module alias.
|
|
24
|
+
(class_declaration
|
|
25
|
+
(class_heritage (extends_clause value: (member_expression) @base.name))) @base.def
|
|
26
|
+
(abstract_class_declaration
|
|
27
|
+
(class_heritage (extends_clause value: (member_expression) @base.name))) @base.def
|
|
28
|
+
|
|
29
|
+
(function_declaration
|
|
30
|
+
name: (identifier) @name) @def.function
|
|
31
|
+
|
|
32
|
+
; methods live in a class_body -> promoted to METHOD by the extractor
|
|
33
|
+
(method_definition
|
|
34
|
+
name: (property_identifier) @name) @def.function
|
|
35
|
+
|
|
36
|
+
; --- JSDoc/TSDoc docstrings (DESCRIBES) ---
|
|
37
|
+
; a `/** … */` block comment immediately before a function/class/method becomes a
|
|
38
|
+
; DocChunk that DESCRIBES the symbol (feat-010); `#match?` keeps only `/**` blocks.
|
|
39
|
+
((comment) @docstring . (function_declaration) @doc.owner
|
|
40
|
+
(#match? @docstring "^/[*][*]"))
|
|
41
|
+
((comment) @docstring . (class_declaration) @doc.owner
|
|
42
|
+
(#match? @docstring "^/[*][*]"))
|
|
43
|
+
((comment) @docstring . (abstract_class_declaration) @doc.owner
|
|
44
|
+
(#match? @docstring "^/[*][*]"))
|
|
45
|
+
(class_body
|
|
46
|
+
(comment) @docstring . (method_definition) @doc.owner
|
|
47
|
+
(#match? @docstring "^/[*][*]"))
|
|
48
|
+
|
|
49
|
+
; --- TS type surface (ENH-008) ---
|
|
50
|
+
; `interface Foo {}` -> Interface (the dominant way TS describes contracts).
|
|
51
|
+
(interface_declaration
|
|
52
|
+
name: (type_identifier) @name) @def.interface
|
|
53
|
+
|
|
54
|
+
; `enum E {}` and `const enum E {}` -> Class (no dedicated Enum kind; a named
|
|
55
|
+
; nominal type with members). Both parse as enum_declaration with an identifier.
|
|
56
|
+
(enum_declaration
|
|
57
|
+
name: (identifier) @name) @def.enum
|
|
58
|
+
|
|
59
|
+
; `type ID = ...` -> TypeAlias.
|
|
60
|
+
(type_alias_declaration
|
|
61
|
+
name: (type_identifier) @name) @def.type
|
|
62
|
+
|
|
63
|
+
; --- value bindings (ENH-008, shared with JS) ---
|
|
64
|
+
; `const f = (…) => …` / `const f = function () {}` -> Function (named from the
|
|
65
|
+
; binding). Captured at any depth — these are genuine functions.
|
|
66
|
+
(lexical_declaration
|
|
67
|
+
(variable_declarator
|
|
68
|
+
name: (identifier) @name
|
|
69
|
+
value: [(arrow_function) (function_expression)])) @def.function
|
|
70
|
+
|
|
71
|
+
; module-level const data tables / const-object enums -> Variable. Scoped to the
|
|
72
|
+
; top level (program / export) so locals inside function bodies don't inflate the
|
|
73
|
+
; graph. Only object/array initializers — NOT call results: `const x =
|
|
74
|
+
; require(...)` is an import binding (BUG-006), and call-bound public constants
|
|
75
|
+
; (e.g. zod's `ZodIssueCode = arrayToEnum([...])`) stay findable via their
|
|
76
|
+
; companion `type X = ...` alias, captured above as a TypeAlias.
|
|
77
|
+
(program
|
|
78
|
+
(lexical_declaration
|
|
79
|
+
(variable_declarator
|
|
80
|
+
name: (identifier) @name
|
|
81
|
+
value: [(object) (array)])) @def.variable)
|
|
82
|
+
(program
|
|
83
|
+
(export_statement
|
|
84
|
+
(lexical_declaration
|
|
85
|
+
(variable_declarator
|
|
86
|
+
name: (identifier) @name
|
|
87
|
+
value: [(object) (array)])) @def.variable))
|
|
88
|
+
|
|
89
|
+
; --- imports ---
|
|
90
|
+
; `import { a, b } from "./mod"` -> module (relative path) + bound names
|
|
91
|
+
(import_statement
|
|
92
|
+
(import_clause (named_imports (import_specifier name: (identifier) @import.name)))
|
|
93
|
+
source: (string (string_fragment) @import.module)) @import
|
|
94
|
+
|
|
95
|
+
; `import * as ns from "./mod"` -> the namespace alias binds the whole module, so
|
|
96
|
+
; `ns.foo()` and a qualified base `extends ns.Base` resolve to its exports (BUG-006).
|
|
97
|
+
(import_statement
|
|
98
|
+
(import_clause (namespace_import (identifier) @import.default))
|
|
99
|
+
source: (string (string_fragment) @import.module)) @import
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""``IngestPipeline`` — drives the two passes over a whole repo.
|
|
2
|
+
|
|
3
|
+
Extraction is CPU-bound and file-isolated, so files are parsed on a thread
|
|
4
|
+
pool with bounded concurrency; the store serializes its own writes. A fresh
|
|
5
|
+
``TreeSitterExtractor`` is built inside each worker thread because a
|
|
6
|
+
tree-sitter ``Parser`` is not safe to share across threads (the grammar
|
|
7
|
+
itself is cached, so only the lightweight parser/query objects are rebuilt).
|
|
8
|
+
After all files are upserted, the resolver runs once.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
|
|
15
|
+
from agentforge_graph.core import FileSubgraph, GraphStore, SourceFile
|
|
16
|
+
from agentforge_graph.frameworks import FrameworkExtractor
|
|
17
|
+
|
|
18
|
+
from .extractor import TreeSitterExtractor
|
|
19
|
+
from .pack import LanguagePack, PackRegistry
|
|
20
|
+
from .report import IndexReport
|
|
21
|
+
from .resolver import ImportResolver
|
|
22
|
+
from .source import RepoSource, read_go_module
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _extract_one(
|
|
26
|
+
pack: LanguagePack,
|
|
27
|
+
repo: str,
|
|
28
|
+
commit: str,
|
|
29
|
+
sf: SourceFile,
|
|
30
|
+
frameworks: FrameworkExtractor | None,
|
|
31
|
+
) -> tuple[FileSubgraph, int]:
|
|
32
|
+
# Built and used entirely within the worker thread (parser is not shareable).
|
|
33
|
+
sg = TreeSitterExtractor(pack, repo, commit).extract(sf)
|
|
34
|
+
unresolved = 0
|
|
35
|
+
if frameworks is not None and frameworks.active:
|
|
36
|
+
facts = frameworks.extract(sf, repo, commit) # feat-011: routes/etc.
|
|
37
|
+
unresolved = facts.unresolved
|
|
38
|
+
if facts.nodes or facts.edges:
|
|
39
|
+
sg = sg.model_copy(
|
|
40
|
+
update={"nodes": [*sg.nodes, *facts.nodes], "edges": [*sg.edges, *facts.edges]}
|
|
41
|
+
)
|
|
42
|
+
return sg, unresolved
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class IngestPipeline:
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
repo: str,
|
|
49
|
+
commit: str = "",
|
|
50
|
+
concurrency: int = 8,
|
|
51
|
+
frameworks: FrameworkExtractor | None = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
self.repo = repo
|
|
54
|
+
self.commit = commit
|
|
55
|
+
self.concurrency = concurrency
|
|
56
|
+
self.frameworks = frameworks
|
|
57
|
+
|
|
58
|
+
async def run(
|
|
59
|
+
self,
|
|
60
|
+
source: RepoSource,
|
|
61
|
+
store: GraphStore,
|
|
62
|
+
registry: PackRegistry,
|
|
63
|
+
paths: set[str] | None = None,
|
|
64
|
+
) -> IndexReport:
|
|
65
|
+
"""Extract + upsert each file, then resolve. When ``paths`` is given,
|
|
66
|
+
only those files are (re)extracted (feat-004 incremental scope); the
|
|
67
|
+
resolver is **not** run here — incremental refresh owns scoped
|
|
68
|
+
re-resolution. ``paths is None`` is the full-index path (resolve runs).
|
|
69
|
+
Active framework packs (feat-011) emit extra nodes/edges merged into
|
|
70
|
+
each file's subgraph, so they ride the same upsert + incrementality."""
|
|
71
|
+
report = IndexReport()
|
|
72
|
+
sem = asyncio.Semaphore(self.concurrency)
|
|
73
|
+
|
|
74
|
+
async def _do(sf: SourceFile) -> tuple[FileSubgraph, int] | None:
|
|
75
|
+
pack = registry.for_slug(sf.language)
|
|
76
|
+
if pack is None:
|
|
77
|
+
return None
|
|
78
|
+
async with sem:
|
|
79
|
+
result = await asyncio.to_thread(
|
|
80
|
+
_extract_one, pack, self.repo, self.commit, sf, self.frameworks
|
|
81
|
+
)
|
|
82
|
+
await store.upsert(result[0])
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
files = (sf for sf in source.iter_files(registry) if paths is None or sf.path in paths)
|
|
86
|
+
results = await asyncio.gather(*[_do(sf) for sf in files])
|
|
87
|
+
|
|
88
|
+
for result in results:
|
|
89
|
+
if result is None:
|
|
90
|
+
continue
|
|
91
|
+
sg, unresolved = result
|
|
92
|
+
report.files_indexed += 1
|
|
93
|
+
report.nodes += len(sg.nodes)
|
|
94
|
+
report.edges += len(sg.edges)
|
|
95
|
+
report.framework_unresolved += unresolved
|
|
96
|
+
for n in sg.nodes:
|
|
97
|
+
report.by_node_kind[n.kind.value] = report.by_node_kind.get(n.kind.value, 0) + 1
|
|
98
|
+
for e in sg.edges:
|
|
99
|
+
report.by_edge_kind[e.kind.value] = report.by_edge_kind.get(e.kind.value, 0) + 1
|
|
100
|
+
report.skipped = list(source.skipped)
|
|
101
|
+
report.routes_extracted = report.by_node_kind.get("Route", 0)
|
|
102
|
+
report.models_extracted = report.by_node_kind.get("DataModel", 0)
|
|
103
|
+
report.services_extracted = report.by_node_kind.get("Service", 0)
|
|
104
|
+
|
|
105
|
+
if paths is not None:
|
|
106
|
+
# Scoped (incremental) extract: the caller re-resolves with the
|
|
107
|
+
# right import-graph scope. Edge tallies come from that pass.
|
|
108
|
+
return report
|
|
109
|
+
|
|
110
|
+
stats = await ImportResolver(
|
|
111
|
+
registry, self.commit, go_module=read_go_module(source.root)
|
|
112
|
+
).resolve(store)
|
|
113
|
+
report.resolve = stats
|
|
114
|
+
imports = stats.imports_resolved + stats.imports_external
|
|
115
|
+
report.by_edge_kind["IMPORTS"] = report.by_edge_kind.get("IMPORTS", 0) + imports
|
|
116
|
+
report.by_edge_kind["CALLS"] = report.by_edge_kind.get("CALLS", 0) + stats.refs_resolved
|
|
117
|
+
if stats.inherits_resolved:
|
|
118
|
+
report.by_edge_kind["INHERITS"] = (
|
|
119
|
+
report.by_edge_kind.get("INHERITS", 0) + stats.inherits_resolved
|
|
120
|
+
)
|
|
121
|
+
report.edges += imports + stats.refs_resolved + stats.inherits_resolved
|
|
122
|
+
|
|
123
|
+
# feat-011 pass-2: stitch ORM relationship/FK string targets into
|
|
124
|
+
# RELATES_TO edges (and future router-prefix composition).
|
|
125
|
+
if self.frameworks is not None and self.frameworks.active:
|
|
126
|
+
resolved, unresolved = await self.frameworks.resolve(store, self.commit)
|
|
127
|
+
if resolved:
|
|
128
|
+
report.relations_resolved = resolved
|
|
129
|
+
report.by_edge_kind["RELATES_TO"] = (
|
|
130
|
+
report.by_edge_kind.get("RELATES_TO", 0) + resolved
|
|
131
|
+
)
|
|
132
|
+
report.edges += resolved
|
|
133
|
+
report.framework_unresolved += unresolved
|
|
134
|
+
return report
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Result types for an indexing run."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ResolveStats(BaseModel):
|
|
9
|
+
"""Outcome of the pass-2 resolver."""
|
|
10
|
+
|
|
11
|
+
imports_resolved: int = 0 # IMPORTS edges to in-repo files
|
|
12
|
+
imports_external: int = 0 # IMPORTS edges to external (stdlib/third-party) packages
|
|
13
|
+
refs_resolved: int = 0 # CALLS edges created (unique match)
|
|
14
|
+
refs_unresolved: int = 0 # call sites with zero/ambiguous targets (recorded, not guessed)
|
|
15
|
+
inherits_resolved: int = 0 # INHERITS edges created (base class -> in-repo class)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RouteInfo(BaseModel):
|
|
19
|
+
"""One extracted endpoint (feat-011), for ``CodeGraph.routes`` / ``ckg
|
|
20
|
+
routes`` / the ``ckg_routes`` tool."""
|
|
21
|
+
|
|
22
|
+
method: str
|
|
23
|
+
path: str
|
|
24
|
+
framework: str
|
|
25
|
+
handler: str # handler symbol id (HANDLED_BY target)
|
|
26
|
+
file: str
|
|
27
|
+
line: int
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict[str, object]:
|
|
30
|
+
return self.model_dump()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ModelInfo(BaseModel):
|
|
34
|
+
"""One extracted ORM data model (feat-011), for ``CodeGraph.models`` /
|
|
35
|
+
``ckg models``."""
|
|
36
|
+
|
|
37
|
+
name: str # table name when known, else the class name
|
|
38
|
+
table: str # mapped table name ("" when not statically known)
|
|
39
|
+
framework: str
|
|
40
|
+
fields: list[str] # mapped column/field names
|
|
41
|
+
relations: list[dict[str, str]] # RELATES_TO out: {to, kind, via}
|
|
42
|
+
cls: str # the underlying class symbol id
|
|
43
|
+
file: str
|
|
44
|
+
line: int
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> dict[str, object]:
|
|
47
|
+
return self.model_dump()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ServiceInfo(BaseModel):
|
|
51
|
+
"""One DI-provided service (feat-011), for ``CodeGraph.services`` /
|
|
52
|
+
``ckg services``."""
|
|
53
|
+
|
|
54
|
+
name: str # the provider (dependency) name
|
|
55
|
+
framework: str
|
|
56
|
+
injected_into: list[str] # consumer symbol ids the service is INJECTED_INTO
|
|
57
|
+
file: str
|
|
58
|
+
line: int
|
|
59
|
+
|
|
60
|
+
def to_dict(self) -> dict[str, object]:
|
|
61
|
+
return self.model_dump()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class IndexReport(BaseModel):
|
|
65
|
+
"""Summary of a full ``IngestPipeline.run`` / ``CodeGraph.index``."""
|
|
66
|
+
|
|
67
|
+
files_indexed: int = 0
|
|
68
|
+
nodes: int = 0
|
|
69
|
+
edges: int = 0
|
|
70
|
+
by_node_kind: dict[str, int] = Field(default_factory=dict)
|
|
71
|
+
by_edge_kind: dict[str, int] = Field(default_factory=dict)
|
|
72
|
+
skipped: list[str] = Field(default_factory=list)
|
|
73
|
+
resolve: ResolveStats = Field(default_factory=ResolveStats)
|
|
74
|
+
routes_extracted: int = 0 # feat-011: framework Route nodes emitted
|
|
75
|
+
models_extracted: int = 0 # feat-011: ORM DataModel nodes emitted
|
|
76
|
+
services_extracted: int = 0 # feat-011: DI Service nodes emitted
|
|
77
|
+
relations_resolved: int = 0 # feat-011: RELATES_TO edges from ORM relationship/FK targets
|
|
78
|
+
framework_unresolved: int = 0 # framework registrations seen but not extractable
|
|
79
|
+
decisions_indexed: int = 0 # feat-010: ADR Decision nodes
|
|
80
|
+
governs_resolved: int = 0 # GOVERNS edges from unambiguous ADR mentions
|
|
81
|
+
mentions_unresolved: int = 0 # ADR mentions seen but not linked
|
|
82
|
+
docs_indexed: int = 0 # feat-010: general doc files (doc_globs) ingested
|
|
83
|
+
describes_resolved: int = 0 # DESCRIBES edges from unambiguous doc mentions
|
|
84
|
+
commits_indexed: int = 0 # feat-010: commit messages ingested as DocChunks
|