gitnexus 1.6.1 → 1.6.2-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -0
- package/dist/cli/analyze.js +23 -1
- package/dist/core/embeddings/embedder.js +5 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +12 -3
- package/dist/core/embeddings/embedding-pipeline.js +79 -29
- package/dist/core/group/extractors/grpc-extractor.d.ts +1 -1
- package/dist/core/group/extractors/grpc-extractor.js +28 -13
- package/dist/core/group/extractors/http-route-extractor.js +35 -5
- package/dist/core/group/extractors/manifest-extractor.js +66 -9
- package/dist/core/group/sync.js +49 -1
- package/dist/core/ingestion/language-provider.d.ts +24 -5
- package/dist/core/ingestion/languages/c-cpp.js +2 -2
- package/dist/core/ingestion/languages/dart.d.ts +1 -1
- package/dist/core/ingestion/languages/dart.js +2 -2
- package/dist/core/ingestion/languages/go.d.ts +1 -1
- package/dist/core/ingestion/languages/go.js +2 -2
- package/dist/core/ingestion/languages/ruby.js +1 -1
- package/dist/core/ingestion/languages/swift.d.ts +1 -1
- package/dist/core/ingestion/languages/swift.js +2 -2
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +36 -1
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +143 -5
- package/dist/core/lbug/csv-generator.js +7 -4
- package/dist/core/lbug/lbug-adapter.d.ts +38 -0
- package/dist/core/lbug/lbug-adapter.js +189 -65
- package/dist/core/lbug/schema.d.ts +7 -0
- package/dist/core/lbug/schema.js +9 -1
- package/dist/core/run-analyze.js +18 -4
- package/dist/mcp/core/embedder.js +5 -0
- package/dist/server/api.js +9 -1
- package/package.json +6 -4
- package/scripts/build-tree-sitter-proto.cjs +82 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
- package/vendor/tree-sitter-proto/package.json +1 -7
|
@@ -21,8 +21,25 @@ import type { SyntaxNode } from './utils/ast-helpers.js';
|
|
|
21
21
|
import type { NodeLabel } from '../../_shared/index.js';
|
|
22
22
|
/** Tree-sitter query captures: capture name → AST node (or undefined if not captured). */
|
|
23
23
|
export type CaptureMap = Record<string, SyntaxNode | undefined>;
|
|
24
|
-
/**
|
|
25
|
-
|
|
24
|
+
/**
|
|
25
|
+
* How a language handles imports — determines wildcard synthesis behavior.
|
|
26
|
+
*
|
|
27
|
+
* Import resolution is a graph-traversal policy with multiple distinct strategies,
|
|
28
|
+
* analogous to MRO for method resolution. Each tag picks a strategy:
|
|
29
|
+
*
|
|
30
|
+
* | Tag | Mechanism | Traversal | Languages |
|
|
31
|
+
* |-----------------------|------------------------------------------------|---------------------|--------------------------------------------|
|
|
32
|
+
* | `named` | Per-symbol imports | None (use-site) | JS/TS, Java, C#, Rust, PHP, Kotlin, Vue |
|
|
33
|
+
* | `wildcard-transitive` | Textual paste, symbols chain through files | BFS closure | C, C++ (future: Obj-C, Fortran, Nim) |
|
|
34
|
+
* | `wildcard-leaf` | Whole public API, single hop | None (direct only) | Go, Ruby, Swift, Dart |
|
|
35
|
+
* | `namespace` | Qualified handle; symbols resolved at call site| None at import | Python |
|
|
36
|
+
* | `explicit-reexport` | Opt-in per-symbol re-export (SCAFFOLD) | Topological DAG | (future: TS `export *`, Rust `pub use`) |
|
|
37
|
+
*
|
|
38
|
+
* The `explicit-reexport` tag is a compile-time scaffold; no provider claims it yet.
|
|
39
|
+
* It falls through to `wildcard-leaf` behavior in synthesis so today's TS/Rust
|
|
40
|
+
* handling is unchanged. A future PR will implement the DAG walk for `export *`.
|
|
41
|
+
*/
|
|
42
|
+
export type ImportSemantics = 'named' | 'wildcard-transitive' | 'wildcard-leaf' | 'namespace' | 'explicit-reexport';
|
|
26
43
|
/**
|
|
27
44
|
* Everything a language needs to provide.
|
|
28
45
|
* Required fields must be explicitly set; optional fields have defaults
|
|
@@ -51,10 +68,12 @@ interface LanguageProviderConfig {
|
|
|
51
68
|
/** Named binding extraction from import statements.
|
|
52
69
|
* Default: undefined (language uses wildcard/whole-module imports). */
|
|
53
70
|
readonly namedBindingExtractor?: NamedBindingExtractorFn;
|
|
54
|
-
/** How this language handles imports.
|
|
71
|
+
/** How this language handles imports. See `ImportSemantics` for the full taxonomy.
|
|
55
72
|
* - 'named': per-symbol imports (JS/TS, Java, C#, Rust, PHP, Kotlin)
|
|
56
|
-
* - 'wildcard':
|
|
57
|
-
* - '
|
|
73
|
+
* - 'wildcard-transitive': textual-include closure; imports chain through files (C, C++)
|
|
74
|
+
* - 'wildcard-leaf': whole-module single-hop imports; no transitive chaining (Go, Ruby, Swift, Dart)
|
|
75
|
+
* - 'namespace': qualified namespace imports, needs moduleAliasMap (Python)
|
|
76
|
+
* - 'explicit-reexport': opt-in per-symbol re-export (scaffold; no provider uses yet)
|
|
58
77
|
* Default: 'named'. */
|
|
59
78
|
readonly importSemantics?: ImportSemantics;
|
|
60
79
|
/** Language-specific transformation of raw import path text before resolution.
|
|
@@ -293,7 +293,7 @@ export const cProvider = defineLanguage({
|
|
|
293
293
|
typeConfig: cCppConfig,
|
|
294
294
|
exportChecker: cCppExportChecker,
|
|
295
295
|
importResolver: resolveCImport,
|
|
296
|
-
importSemantics: 'wildcard',
|
|
296
|
+
importSemantics: 'wildcard-transitive',
|
|
297
297
|
fieldExtractor: createFieldExtractor(cFieldConfig),
|
|
298
298
|
methodExtractor: createMethodExtractor({
|
|
299
299
|
...cMethodConfig,
|
|
@@ -310,7 +310,7 @@ export const cppProvider = defineLanguage({
|
|
|
310
310
|
typeConfig: cCppConfig,
|
|
311
311
|
exportChecker: cCppExportChecker,
|
|
312
312
|
importResolver: resolveCppImport,
|
|
313
|
-
importSemantics: 'wildcard',
|
|
313
|
+
importSemantics: 'wildcard-transitive',
|
|
314
314
|
mroStrategy: 'leftmost-base',
|
|
315
315
|
fieldExtractor: createFieldExtractor(cppFieldConfig),
|
|
316
316
|
methodExtractor: createMethodExtractor({
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Dart Language Provider
|
|
3
3
|
*
|
|
4
4
|
* Dart traits:
|
|
5
|
-
* - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
|
|
5
|
+
* - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
|
|
6
6
|
* - exportChecker: public if no leading underscore
|
|
7
7
|
* - Dart SDK imports (dart:*) and external packages are skipped
|
|
8
8
|
* - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Dart Language Provider
|
|
3
3
|
*
|
|
4
4
|
* Dart traits:
|
|
5
|
-
* - importSemantics: 'wildcard' (Dart imports bring everything public into scope)
|
|
5
|
+
* - importSemantics: 'wildcard-leaf' (Dart imports bring everything public into scope)
|
|
6
6
|
* - exportChecker: public if no leading underscore
|
|
7
7
|
* - Dart SDK imports (dart:*) and external packages are skipped
|
|
8
8
|
* - enclosingFunctionFinder: Dart's tree-sitter grammar places function_body
|
|
@@ -83,7 +83,7 @@ export const dartProvider = defineLanguage({
|
|
|
83
83
|
typeConfig: dartConfig,
|
|
84
84
|
exportChecker: dartExportChecker,
|
|
85
85
|
importResolver: resolveDartImport,
|
|
86
|
-
importSemantics: 'wildcard',
|
|
86
|
+
importSemantics: 'wildcard-leaf',
|
|
87
87
|
fieldExtractor: createFieldExtractor(dartFieldConfig),
|
|
88
88
|
methodExtractor: createMethodExtractor(dartMethodConfig),
|
|
89
89
|
classExtractor: createClassExtractor({
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* LanguageProvider, following the Strategy pattern used by the pipeline.
|
|
6
6
|
*
|
|
7
7
|
* Key Go traits:
|
|
8
|
-
* - importSemantics: 'wildcard' (Go imports entire packages)
|
|
8
|
+
* - importSemantics: 'wildcard-leaf' (Go imports entire packages)
|
|
9
9
|
* - callRouter: present (Go method calls may need routing)
|
|
10
10
|
*/
|
|
11
11
|
export declare const goProvider: import("../language-provider.js").LanguageProvider;
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* LanguageProvider, following the Strategy pattern used by the pipeline.
|
|
6
6
|
*
|
|
7
7
|
* Key Go traits:
|
|
8
|
-
* - importSemantics: 'wildcard' (Go imports entire packages)
|
|
8
|
+
* - importSemantics: 'wildcard-leaf' (Go imports entire packages)
|
|
9
9
|
* - callRouter: present (Go method calls may need routing)
|
|
10
10
|
*/
|
|
11
11
|
import { SupportedLanguages } from '../../../_shared/index.js';
|
|
@@ -26,7 +26,7 @@ export const goProvider = defineLanguage({
|
|
|
26
26
|
typeConfig: goConfig,
|
|
27
27
|
exportChecker: goExportChecker,
|
|
28
28
|
importResolver: resolveGoImport,
|
|
29
|
-
importSemantics: 'wildcard',
|
|
29
|
+
importSemantics: 'wildcard-leaf',
|
|
30
30
|
fieldExtractor: createFieldExtractor(goFieldConfig),
|
|
31
31
|
methodExtractor: createMethodExtractor(goMethodConfig),
|
|
32
32
|
classExtractor: createClassExtractor({
|
|
@@ -99,7 +99,7 @@ export const rubyProvider = defineLanguage({
|
|
|
99
99
|
exportChecker: rubyExportChecker,
|
|
100
100
|
importResolver: resolveRubyImport,
|
|
101
101
|
callRouter: routeRubyCall,
|
|
102
|
-
importSemantics: 'wildcard',
|
|
102
|
+
importSemantics: 'wildcard-leaf',
|
|
103
103
|
resolveEnclosingOwner(node) {
|
|
104
104
|
// Ruby singleton_class (class << self) should resolve to the enclosing
|
|
105
105
|
// class or module for owner/container resolution (HAS_METHOD edges, class IDs).
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* LanguageProvider, following the Strategy pattern used by the pipeline.
|
|
6
6
|
*
|
|
7
7
|
* Key Swift traits:
|
|
8
|
-
* - importSemantics: 'wildcard' (Swift imports entire modules)
|
|
8
|
+
* - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
|
|
9
9
|
* - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
|
|
10
10
|
* - implicitImportWirer: all files in the same SPM target see each other
|
|
11
11
|
*/
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* LanguageProvider, following the Strategy pattern used by the pipeline.
|
|
6
6
|
*
|
|
7
7
|
* Key Swift traits:
|
|
8
|
-
* - importSemantics: 'wildcard' (Swift imports entire modules)
|
|
8
|
+
* - importSemantics: 'wildcard-leaf' (Swift imports entire modules)
|
|
9
9
|
* - heritageDefaultEdge: 'IMPLEMENTS' (protocols are more common than class inheritance)
|
|
10
10
|
* - implicitImportWirer: all files in the same SPM target see each other
|
|
11
11
|
*/
|
|
@@ -221,7 +221,7 @@ export const swiftProvider = defineLanguage({
|
|
|
221
221
|
typeConfig: swiftConfig,
|
|
222
222
|
exportChecker: swiftExportChecker,
|
|
223
223
|
importResolver: resolveSwiftImport,
|
|
224
|
-
importSemantics: 'wildcard',
|
|
224
|
+
importSemantics: 'wildcard-leaf',
|
|
225
225
|
heritageDefaultEdge: 'IMPLEMENTS',
|
|
226
226
|
fieldExtractor: createFieldExtractor(swiftFieldConfig),
|
|
227
227
|
methodExtractor: createMethodExtractor({
|
|
@@ -14,12 +14,47 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import type { KnowledgeGraph } from '../../graph/types.js';
|
|
16
16
|
import type { createResolutionContext } from '../model/resolution-context.js';
|
|
17
|
-
import { SupportedLanguages } from '../../../_shared/index.js';
|
|
17
|
+
import type { SupportedLanguages } from '../../../_shared/index.js';
|
|
18
18
|
/** Check if a language uses wildcard (whole-module) import semantics. */
|
|
19
19
|
export declare function isWildcardImportLanguage(lang: SupportedLanguages): boolean;
|
|
20
20
|
/** Check if a language needs synthesis before call resolution.
|
|
21
21
|
* True for wildcard-import languages AND namespace-import languages (Python). */
|
|
22
22
|
export declare function needsSynthesis(lang: SupportedLanguages): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
|
|
25
|
+
*
|
|
26
|
+
* Textual-include languages chain symbols through files: if `dict.c` includes
|
|
27
|
+
* `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
|
|
28
|
+
* all three files. This helper walks the include graph (combining both the
|
|
29
|
+
* ingestion-context `importMap` and the graph-level IMPORTS edges) until the
|
|
30
|
+
* closure is stable.
|
|
31
|
+
*
|
|
32
|
+
* **Order matters.** The returned `Set` preserves iteration order (insertion
|
|
33
|
+
* order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
|
|
34
|
+
* on a first-seen-wins basis, so this closure's ordering determines which
|
|
35
|
+
* declaration wins when multiple headers export the same name (e.g. overloaded
|
|
36
|
+
* free functions like `write_audit()` vs `write_audit(const char*)` in
|
|
37
|
+
* different headers). We therefore:
|
|
38
|
+
* 1. Seed the closure with direct imports in declaration order (matches the
|
|
39
|
+
* order of `#include` directives in the source file).
|
|
40
|
+
* 2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
|
|
41
|
+
* closer headers are seen before deeper ones.
|
|
42
|
+
*
|
|
43
|
+
* Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
|
|
44
|
+
* header includes, which are valid C/C++ when paired with `#pragma once` or
|
|
45
|
+
* include guards.
|
|
46
|
+
*
|
|
47
|
+
* Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
|
|
48
|
+
* prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
|
|
49
|
+
* where one translation unit can transitively reach tens of thousands of
|
|
50
|
+
* headers. Partial closures still yield useful bindings for the cluster of
|
|
51
|
+
* headers closest to the importer, which is what overload resolution and
|
|
52
|
+
* cross-file call resolution care about.
|
|
53
|
+
*
|
|
54
|
+
* Queue implementation: uses a head-index over a growing array (O(1) dequeue)
|
|
55
|
+
* instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
|
|
56
|
+
*/
|
|
57
|
+
export declare function expandTransitiveIncludeClosure(directImports: Iterable<string>, importMap: ReadonlyMap<string, ReadonlySet<string>>, graphImports: ReadonlyMap<string, ReadonlySet<string>>): Set<string>;
|
|
23
58
|
/**
|
|
24
59
|
* Synthesize namedImportMap entries for languages with whole-module imports.
|
|
25
60
|
*
|
|
@@ -34,9 +34,26 @@ const IMPORTABLE_SYMBOL_LABELS = new Set([
|
|
|
34
34
|
/** Max synthetic bindings per importing file — prevents memory bloat
|
|
35
35
|
* for C/C++ files that include many large headers. */
|
|
36
36
|
const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
|
|
37
|
+
/** Max files allowed in a single transitive include closure. Guards against
|
|
38
|
+
* OOM on pathological C/C++ codebases (boost, Linux kernel-style monoheaders)
|
|
39
|
+
* where a single translation unit can transitively reach many thousands of
|
|
40
|
+
* headers. When the cap is hit, BFS expansion stops early — the file still
|
|
41
|
+
* synthesizes bindings from the partial closure rather than failing. */
|
|
42
|
+
const MAX_TRANSITIVE_CLOSURE_SIZE = 5000;
|
|
43
|
+
/** Import semantics tags whose languages need synthesis of whole-module imports.
|
|
44
|
+
* `wildcard-transitive` (C/C++) and `wildcard-leaf` (Go, Ruby, Swift, Dart) are
|
|
45
|
+
* the file-based wildcard strategies. `explicit-reexport` is a scaffold tag —
|
|
46
|
+
* no provider uses it yet, but it goes through the same leaf-style synthesis
|
|
47
|
+
* path today because a re-exporter is still an importer; only the extra DAG
|
|
48
|
+
* walk to surface re-exported symbols is missing (future work). */
|
|
49
|
+
const WILDCARD_SEMANTICS = new Set([
|
|
50
|
+
'wildcard-transitive',
|
|
51
|
+
'wildcard-leaf',
|
|
52
|
+
'explicit-reexport',
|
|
53
|
+
]);
|
|
37
54
|
/** Languages with whole-module import semantics (derived from providers at module load). */
|
|
38
55
|
const WILDCARD_LANGUAGES = new Set(Object.values(providers)
|
|
39
|
-
.filter((p) => p.importSemantics
|
|
56
|
+
.filter((p) => WILDCARD_SEMANTICS.has(p.importSemantics))
|
|
40
57
|
.map((p) => p.id));
|
|
41
58
|
/** Languages that need binding synthesis before call resolution. */
|
|
42
59
|
const SYNTHESIS_LANGUAGES = new Set(Object.values(providers)
|
|
@@ -51,6 +68,82 @@ export function isWildcardImportLanguage(lang) {
|
|
|
51
68
|
export function needsSynthesis(lang) {
|
|
52
69
|
return SYNTHESIS_LANGUAGES.has(lang);
|
|
53
70
|
}
|
|
71
|
+
// ── Strategy implementations ───────────────────────────────────────────────
|
|
72
|
+
/**
|
|
73
|
+
* Strategy implementation for `importSemantics: 'wildcard-transitive'` (C, C++).
|
|
74
|
+
*
|
|
75
|
+
* Textual-include languages chain symbols through files: if `dict.c` includes
|
|
76
|
+
* `server.h` and `server.h` includes `dict.h`, then `dict.c` sees symbols from
|
|
77
|
+
* all three files. This helper walks the include graph (combining both the
|
|
78
|
+
* ingestion-context `importMap` and the graph-level IMPORTS edges) until the
|
|
79
|
+
* closure is stable.
|
|
80
|
+
*
|
|
81
|
+
* **Order matters.** The returned `Set` preserves iteration order (insertion
|
|
82
|
+
* order). `synthesizeWildcardImportBindings` dedupes bindings by symbol name
|
|
83
|
+
* on a first-seen-wins basis, so this closure's ordering determines which
|
|
84
|
+
* declaration wins when multiple headers export the same name (e.g. overloaded
|
|
85
|
+
* free functions like `write_audit()` vs `write_audit(const char*)` in
|
|
86
|
+
* different headers). We therefore:
|
|
87
|
+
* 1. Seed the closure with direct imports in declaration order (matches the
|
|
88
|
+
* order of `#include` directives in the source file).
|
|
89
|
+
* 2. Use FIFO / true BFS (`queue.shift()`) for transitive expansion, so
|
|
90
|
+
* closer headers are seen before deeper ones.
|
|
91
|
+
*
|
|
92
|
+
* Cycle-safe: the `closure.has(file)` guard prevents infinite loops on circular
|
|
93
|
+
* header includes, which are valid C/C++ when paired with `#pragma once` or
|
|
94
|
+
* include guards.
|
|
95
|
+
*
|
|
96
|
+
* Size-bounded: the closure is capped at `MAX_TRANSITIVE_CLOSURE_SIZE` files to
|
|
97
|
+
* prevent OOM on pathological codebases (e.g. boost, monoheader kernel code)
|
|
98
|
+
* where one translation unit can transitively reach tens of thousands of
|
|
99
|
+
* headers. Partial closures still yield useful bindings for the cluster of
|
|
100
|
+
* headers closest to the importer, which is what overload resolution and
|
|
101
|
+
* cross-file call resolution care about.
|
|
102
|
+
*
|
|
103
|
+
* Queue implementation: uses a head-index over a growing array (O(1) dequeue)
|
|
104
|
+
* instead of `Array.prototype.shift()` (O(n)) so deep chains stay linear.
|
|
105
|
+
*/
|
|
106
|
+
export function expandTransitiveIncludeClosure(directImports, importMap, graphImports) {
|
|
107
|
+
const closure = new Set();
|
|
108
|
+
const queue = [];
|
|
109
|
+
let head = 0; // O(1) dequeue: advance the head index instead of shift()-ing.
|
|
110
|
+
const tryEnqueue = (file) => {
|
|
111
|
+
if (closure.has(file))
|
|
112
|
+
return true;
|
|
113
|
+
if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
|
|
114
|
+
return false;
|
|
115
|
+
closure.add(file);
|
|
116
|
+
queue.push(file);
|
|
117
|
+
return true;
|
|
118
|
+
};
|
|
119
|
+
// Seed direct imports in declaration order (see JSDoc on order-sensitivity).
|
|
120
|
+
for (const f of directImports) {
|
|
121
|
+
if (!tryEnqueue(f))
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
// True BFS for transitive reach: head-index FIFO preserves the "closer
|
|
125
|
+
// headers first" ordering that overload resolution depends on.
|
|
126
|
+
while (head < queue.length) {
|
|
127
|
+
if (closure.size >= MAX_TRANSITIVE_CLOSURE_SIZE)
|
|
128
|
+
break;
|
|
129
|
+
const file = queue[head++];
|
|
130
|
+
const nested = importMap.get(file);
|
|
131
|
+
if (nested) {
|
|
132
|
+
for (const n of nested) {
|
|
133
|
+
if (!tryEnqueue(n))
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const nestedGraph = graphImports.get(file);
|
|
138
|
+
if (nestedGraph) {
|
|
139
|
+
for (const n of nestedGraph) {
|
|
140
|
+
if (!tryEnqueue(n))
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return closure;
|
|
146
|
+
}
|
|
54
147
|
// ── Main synthesis function ────────────────────────────────────────────────
|
|
55
148
|
/**
|
|
56
149
|
* Synthesize namedImportMap entries for languages with whole-module imports.
|
|
@@ -133,16 +226,61 @@ export function synthesizeWildcardImportBindings(graph, ctx) {
|
|
|
133
226
|
}
|
|
134
227
|
}
|
|
135
228
|
};
|
|
136
|
-
|
|
229
|
+
/**
|
|
230
|
+
* Dispatch wildcard synthesis by the file's language provider strategy.
|
|
231
|
+
*
|
|
232
|
+
* Strategy tags (see `ImportSemantics`):
|
|
233
|
+
* - `wildcard-transitive`: expand the include closure first (C/C++ #include
|
|
234
|
+
* chains — e.g. `dict.c` → `server.h` → `dict.h` so `dictFind` resolves
|
|
235
|
+
* across header chains)
|
|
236
|
+
* - `wildcard-leaf`: synthesize from direct imports only (Go, Ruby, Swift, Dart)
|
|
237
|
+
* - `explicit-reexport`: scaffold tag; falls through to leaf behavior.
|
|
238
|
+
* TODO(#821): implement re-export DAG walk for TS `export *` / Rust
|
|
239
|
+
* `pub use`. The leaf fallthrough preserves today's TS/Rust behavior
|
|
240
|
+
* (their direct imports still synthesize correctly); only the extra
|
|
241
|
+
* re-export DAG walk for barrel-file correctness is missing.
|
|
242
|
+
* - `namespace` / `named`: no-op here (namespace handled in Loop 3 below,
|
|
243
|
+
* named needs no synthesis).
|
|
244
|
+
*
|
|
245
|
+
* Used by both Loop 1 (ctx.importMap) and Loop 2 (graphImports) so a future
|
|
246
|
+
* transitive-import language whose edges arrive via graphImports gets closure
|
|
247
|
+
* expansion consistently regardless of edge source.
|
|
248
|
+
*/
|
|
249
|
+
const dispatchSynthesis = (filePath, importedFiles, provider) => {
|
|
250
|
+
switch (provider.importSemantics) {
|
|
251
|
+
case 'wildcard-transitive':
|
|
252
|
+
synthesizeForFile(filePath, expandTransitiveIncludeClosure(importedFiles, ctx.importMap, graphImports));
|
|
253
|
+
return;
|
|
254
|
+
case 'wildcard-leaf':
|
|
255
|
+
case 'explicit-reexport':
|
|
256
|
+
synthesizeForFile(filePath, importedFiles);
|
|
257
|
+
return;
|
|
258
|
+
case 'namespace':
|
|
259
|
+
case 'named':
|
|
260
|
+
return;
|
|
261
|
+
default: {
|
|
262
|
+
const _exhaustive = provider.importSemantics;
|
|
263
|
+
void _exhaustive;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
};
|
|
267
|
+
// Loop 1: synthesize from ctx.importMap (Ruby, C/C++, Swift, Dart file-based imports).
|
|
137
268
|
for (const [filePath, importedFiles] of ctx.importMap) {
|
|
138
269
|
const lang = getLanguageFromFilename(filePath);
|
|
139
270
|
if (!lang || !isWildcardImportLanguage(lang))
|
|
140
271
|
continue;
|
|
141
|
-
|
|
272
|
+
const provider = getProviderForFile(filePath);
|
|
273
|
+
if (!provider)
|
|
274
|
+
continue;
|
|
275
|
+
dispatchSynthesis(filePath, importedFiles, provider);
|
|
142
276
|
}
|
|
143
|
-
//
|
|
277
|
+
// Loop 2: synthesize from graph IMPORTS edges (Go and other wildcard-import
|
|
278
|
+
// languages whose edges live in the graph rather than ctx.importMap).
|
|
144
279
|
for (const [filePath, importedFiles] of graphImports) {
|
|
145
|
-
|
|
280
|
+
const provider = getProviderForFile(filePath);
|
|
281
|
+
if (!provider)
|
|
282
|
+
continue;
|
|
283
|
+
dispatchSynthesis(filePath, importedFiles, provider);
|
|
146
284
|
}
|
|
147
285
|
// Build Python module-alias maps for namespace-import languages.
|
|
148
286
|
// `import models` in app.py → moduleAliasMap['app.py']['models'] = 'models.py'
|
|
@@ -246,14 +246,17 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
246
246
|
Interface: interfaceWriter,
|
|
247
247
|
CodeElement: codeElemWriter,
|
|
248
248
|
};
|
|
249
|
-
|
|
249
|
+
// Deduplicate all node types — the pipeline can produce duplicate IDs across
|
|
250
|
+
// all symbol types (Class, Method, Function, etc.), not just File nodes.
|
|
251
|
+
// A single Set covering every label prevents PK violations on COPY.
|
|
252
|
+
const seenNodeIds = new Set();
|
|
250
253
|
// --- SINGLE PASS over all nodes ---
|
|
251
254
|
for (const node of graph.iterNodes()) {
|
|
255
|
+
if (seenNodeIds.has(node.id))
|
|
256
|
+
continue;
|
|
257
|
+
seenNodeIds.add(node.id);
|
|
252
258
|
switch (node.label) {
|
|
253
259
|
case 'File': {
|
|
254
|
-
if (seenFileIds.has(node.id))
|
|
255
|
-
break;
|
|
256
|
-
seenFileIds.add(node.id);
|
|
257
260
|
const content = await extractContent(node, contentCache);
|
|
258
261
|
await fileWriter.addRow([
|
|
259
262
|
escapeCSVField(node.id),
|
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
import lbug from '@ladybugdb/core';
|
|
2
2
|
import { KnowledgeGraph } from '../graph/types.js';
|
|
3
|
+
/** Factory for creating WriteStreams — injectable for testing. */
|
|
4
|
+
export type WriteStreamFactory = (filePath: string) => import('fs').WriteStream;
|
|
5
|
+
/** Result of splitting the relationship CSV into per-label-pair files. */
|
|
6
|
+
export interface RelCsvSplitResult {
|
|
7
|
+
relHeader: string;
|
|
8
|
+
relsByPairMeta: Map<string, {
|
|
9
|
+
csvPath: string;
|
|
10
|
+
rows: number;
|
|
11
|
+
}>;
|
|
12
|
+
pairWriteStreams: Map<string, import('fs').WriteStream>;
|
|
13
|
+
skippedRels: number;
|
|
14
|
+
totalValidRels: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Split a relationship CSV into per-label-pair files on disk.
|
|
18
|
+
*
|
|
19
|
+
* Streams the CSV line-by-line, routing each relationship to a file named
|
|
20
|
+
* `rel_{fromLabel}_{toLabel}.csv`. Handles backpressure correctly: only one
|
|
21
|
+
* drain listener per stream at a time, and readline resumes only when ALL
|
|
22
|
+
* backpressured streams have drained.
|
|
23
|
+
*
|
|
24
|
+
* @param csvPath Path to the combined relationship CSV
|
|
25
|
+
* @param csvDir Directory to write per-pair CSV files
|
|
26
|
+
* @param validTables Set of valid node table names
|
|
27
|
+
* @param getNodeLabel Function to extract the label from a node ID
|
|
28
|
+
* @param wsFactory Optional WriteStream factory (defaults to fs.createWriteStream)
|
|
29
|
+
*/
|
|
30
|
+
export declare const splitRelCsvByLabelPair: (csvPath: string, csvDir: string, validTables: Set<string>, getNodeLabel: (id: string) => string, wsFactory?: WriteStreamFactory) => Promise<RelCsvSplitResult>;
|
|
3
31
|
/** Expose the current Database for pool adapter reuse in tests. */
|
|
4
32
|
export declare const getDatabase: () => lbug.Database | null;
|
|
5
33
|
/**
|
|
@@ -70,8 +98,18 @@ export declare const loadCachedEmbeddings: () => Promise<{
|
|
|
70
98
|
embeddings: Array<{
|
|
71
99
|
nodeId: string;
|
|
72
100
|
embedding: number[];
|
|
101
|
+
contentHash?: string;
|
|
73
102
|
}>;
|
|
74
103
|
}>;
|
|
104
|
+
/**
|
|
105
|
+
* Fetch existing embedding hashes from CodeEmbedding table for incremental embedding.
|
|
106
|
+
* Returns a Map<nodeId, contentHash> suitable for passing to `runEmbeddingPipeline`.
|
|
107
|
+
* Handles legacy DBs without the `contentHash` column (all rows treated as stale with empty hash).
|
|
108
|
+
* Returns undefined if the CodeEmbedding table does not exist.
|
|
109
|
+
*
|
|
110
|
+
* @param execQuery - Cypher query executor (typically pool-adapter's `executeQuery`)
|
|
111
|
+
*/
|
|
112
|
+
export declare const fetchExistingEmbeddingHashes: (execQuery: (cypher: string) => Promise<any[]>) => Promise<Map<string, string> | undefined>;
|
|
75
113
|
export declare const closeLbug: () => Promise<void>;
|
|
76
114
|
export declare const isLbugReady: () => boolean;
|
|
77
115
|
/**
|