gitnexus 1.6.2-rc.2 → 1.6.2-rc.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/lbug/schema-constants.d.ts +1 -1
- package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
- package/dist/_shared/lbug/schema-constants.js +1 -0
- package/dist/_shared/lbug/schema-constants.js.map +1 -1
- package/dist/cli/analyze.js +3 -0
- package/dist/core/embeddings/ast-utils.d.ts +22 -0
- package/dist/core/embeddings/ast-utils.js +105 -0
- package/dist/core/embeddings/character-chunk.d.ts +12 -0
- package/dist/core/embeddings/character-chunk.js +43 -0
- package/dist/core/embeddings/chunker.d.ts +14 -0
- package/dist/core/embeddings/chunker.js +234 -0
- package/dist/core/embeddings/embedder.js +5 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +29 -24
- package/dist/core/embeddings/embedding-pipeline.js +244 -125
- package/dist/core/embeddings/line-index.d.ts +7 -0
- package/dist/core/embeddings/line-index.js +42 -0
- package/dist/core/embeddings/server-mapping.d.ts +15 -0
- package/dist/core/embeddings/server-mapping.js +33 -0
- package/dist/core/embeddings/structural-extractor.d.ts +15 -0
- package/dist/core/embeddings/structural-extractor.js +58 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -13
- package/dist/core/embeddings/text-generator.js +151 -119
- package/dist/core/embeddings/types.d.ts +81 -3
- package/dist/core/embeddings/types.js +105 -3
- package/dist/core/group/extractors/http-patterns/node.js +130 -0
- package/dist/core/group/extractors/manifest-extractor.js +20 -5
- package/dist/core/group/sync.js +49 -1
- package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
- package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
- package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
- package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/call-extractors/generic.js +59 -0
- package/dist/core/ingestion/call-processor.d.ts +1 -3
- package/dist/core/ingestion/call-processor.js +49 -47
- package/dist/core/ingestion/call-types.d.ts +60 -0
- package/dist/core/ingestion/call-types.js +2 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
- package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
- package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
- package/dist/core/ingestion/field-types.d.ts +1 -1
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
- package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
- package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
- package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
- package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
- package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
- package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
- package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
- package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
- package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/go.js +4 -19
- package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
- package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
- package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/php.js +4 -7
- package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
- package/dist/core/ingestion/import-resolvers/python.js +3 -18
- package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
- package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
- package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
- package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/rust.js +4 -47
- package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
- package/dist/core/ingestion/import-resolvers/standard.js +7 -8
- package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
- package/dist/core/ingestion/language-provider.d.ts +12 -0
- package/dist/core/ingestion/languages/c-cpp.js +15 -12
- package/dist/core/ingestion/languages/csharp.js +11 -21
- package/dist/core/ingestion/languages/dart.js +11 -7
- package/dist/core/ingestion/languages/go.js +11 -20
- package/dist/core/ingestion/languages/java.js +11 -18
- package/dist/core/ingestion/languages/kotlin.js +11 -13
- package/dist/core/ingestion/languages/php.js +11 -7
- package/dist/core/ingestion/languages/python.js +11 -7
- package/dist/core/ingestion/languages/ruby.js +11 -7
- package/dist/core/ingestion/languages/rust.js +11 -7
- package/dist/core/ingestion/languages/swift.js +11 -18
- package/dist/core/ingestion/languages/typescript.js +15 -23
- package/dist/core/ingestion/languages/vue.js +11 -17
- package/dist/core/ingestion/model/index.d.ts +2 -2
- package/dist/core/ingestion/model/index.js +1 -1
- package/dist/core/ingestion/model/resolve.d.ts +3 -0
- package/dist/core/ingestion/model/resolve.js +6 -2
- package/dist/core/ingestion/parsing-processor.d.ts +1 -2
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
- package/dist/core/ingestion/tree-sitter-queries.js +81 -0
- package/dist/core/ingestion/type-env.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.js +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
- package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
- package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
- package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
- package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
- package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/variable-extractors/generic.js +80 -0
- package/dist/core/ingestion/variable-types.d.ts +82 -0
- package/dist/core/ingestion/variable-types.js +2 -0
- package/dist/core/ingestion/workers/parse-worker.js +196 -166
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/lbug/csv-generator.js +1 -0
- package/dist/core/lbug/lbug-adapter.d.ts +13 -4
- package/dist/core/lbug/lbug-adapter.js +166 -81
- package/dist/core/lbug/schema.d.ts +9 -1
- package/dist/core/lbug/schema.js +19 -2
- package/dist/core/run-analyze.js +17 -4
- package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
- package/dist/core/tree-sitter/parser-loader.js +17 -8
- package/dist/mcp/core/embedder.js +5 -0
- package/dist/mcp/local/local-backend.js +29 -19
- package/dist/server/api.js +10 -21
- package/package.json +5 -3
- package/scripts/build-tree-sitter-proto.cjs +82 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
- package/vendor/tree-sitter-proto/package.json +1 -7
- package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
- package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
- package/dist/core/ingestion/call-sites/java.d.ts +0 -9
- package/dist/core/ingestion/call-sites/java.js +0 -30
- package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
- package/dist/core/ingestion/import-resolvers/vue.js +0 -9
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import { createReadStream, createWriteStream } from 'fs';
|
|
3
3
|
import { createInterface } from 'readline';
|
|
4
|
+
import { once } from 'events';
|
|
5
|
+
import { finished } from 'stream/promises';
|
|
4
6
|
import path from 'path';
|
|
5
7
|
import lbug from '@ladybugdb/core';
|
|
6
|
-
import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, } from './schema.js';
|
|
8
|
+
import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, STALE_HASH_SENTINEL, } from './schema.js';
|
|
7
9
|
import { streamAllCSVsToDisk } from './csv-generator.js';
|
|
8
10
|
/**
|
|
9
11
|
* Split a relationship CSV into per-label-pair files on disk.
|
|
@@ -25,100 +27,82 @@ export const splitRelCsvByLabelPair = async (csvPath, csvDir, validTables, getNo
|
|
|
25
27
|
const pairWriteStreams = new Map();
|
|
26
28
|
let skippedRels = 0;
|
|
27
29
|
let totalValidRels = 0;
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
}
|
|
47
|
-
catch { }
|
|
48
|
-
try {
|
|
49
|
-
inputStream.destroy();
|
|
50
|
-
}
|
|
51
|
-
catch { }
|
|
52
|
-
for (const ws of pairWriteStreams.values()) {
|
|
53
|
-
try {
|
|
54
|
-
ws.destroy();
|
|
55
|
-
}
|
|
56
|
-
catch { }
|
|
57
|
-
}
|
|
58
|
-
reject(err);
|
|
59
|
-
};
|
|
30
|
+
const inputStream = createReadStream(csvPath, 'utf-8');
|
|
31
|
+
const rl = createInterface({ input: inputStream, crlfDelay: Infinity });
|
|
32
|
+
// If any pair WriteStream errors (disk full, EMFILE, etc.) or the input
|
|
33
|
+
// stream fails, we need to abort the pending `once(ws, 'drain')` await.
|
|
34
|
+
// An AbortController gives us one signal to cancel all pending waits
|
|
35
|
+
// without a custom state machine.
|
|
36
|
+
const abortOnError = new AbortController();
|
|
37
|
+
let streamError = null;
|
|
38
|
+
const markStreamError = (err) => {
|
|
39
|
+
streamError ??= err;
|
|
40
|
+
abortOnError.abort(err);
|
|
41
|
+
};
|
|
42
|
+
try {
|
|
43
|
+
// `for await (const line of rl)` replaces the old manual
|
|
44
|
+
// on('line')/pause()/resume()/waitingForDrain state machine: readline's
|
|
45
|
+
// async iterator naturally serializes line delivery with our awaits, so
|
|
46
|
+
// at most one ws can be in backpressure at a time and we just await its
|
|
47
|
+
// 'drain' event.
|
|
60
48
|
let isFirst = true;
|
|
61
|
-
|
|
49
|
+
for await (const line of rl) {
|
|
50
|
+
if (streamError)
|
|
51
|
+
throw streamError;
|
|
62
52
|
if (isFirst) {
|
|
63
53
|
relHeader = line;
|
|
64
54
|
isFirst = false;
|
|
65
|
-
|
|
55
|
+
continue;
|
|
66
56
|
}
|
|
67
57
|
if (!line.trim())
|
|
68
|
-
|
|
58
|
+
continue;
|
|
69
59
|
const match = line.match(/"([^"]*)","([^"]*)"/);
|
|
70
60
|
if (!match) {
|
|
71
61
|
skippedRels++;
|
|
72
|
-
|
|
62
|
+
continue;
|
|
73
63
|
}
|
|
74
64
|
const fromLabel = getNodeLabel(match[1]);
|
|
75
65
|
const toLabel = getNodeLabel(match[2]);
|
|
76
66
|
if (!validTables.has(fromLabel) || !validTables.has(toLabel)) {
|
|
77
67
|
skippedRels++;
|
|
78
|
-
|
|
68
|
+
continue;
|
|
79
69
|
}
|
|
80
70
|
const pairKey = `${fromLabel}|${toLabel}`;
|
|
81
71
|
let ws = pairWriteStreams.get(pairKey);
|
|
82
72
|
if (!ws) {
|
|
83
73
|
const pairCsvPath = path.join(csvDir, `rel_${fromLabel}_${toLabel}.csv`);
|
|
84
74
|
ws = wsFactory(pairCsvPath);
|
|
85
|
-
|
|
86
|
-
// tear down everything and reject the Promise. Without this handler,
|
|
87
|
-
// a stream error while rl is paused waiting for drain would cause
|
|
88
|
-
// the drain callback to never fire and the Promise to hang forever.
|
|
89
|
-
ws.on('error', cleanup);
|
|
90
|
-
ws.write(relHeader + '\n');
|
|
75
|
+
ws.on('error', markStreamError);
|
|
91
76
|
pairWriteStreams.set(pairKey, ws);
|
|
92
77
|
relsByPairMeta.set(pairKey, { csvPath: pairCsvPath, rows: 0 });
|
|
78
|
+
if (!ws.write(relHeader + '\n')) {
|
|
79
|
+
await once(ws, 'drain', { signal: abortOnError.signal });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (!ws.write(line + '\n')) {
|
|
83
|
+
await once(ws, 'drain', { signal: abortOnError.signal });
|
|
93
84
|
}
|
|
94
|
-
const ok = ws.write(line + '\n');
|
|
95
85
|
relsByPairMeta.get(pairKey).rows++;
|
|
96
86
|
totalValidRels++;
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
settled = true;
|
|
117
|
-
resolve();
|
|
118
|
-
}
|
|
119
|
-
});
|
|
120
|
-
rl.on('error', cleanup);
|
|
121
|
-
});
|
|
87
|
+
}
|
|
88
|
+
if (streamError)
|
|
89
|
+
throw streamError;
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
// Tear down everything so no fd is left dangling. If the abort was caused
|
|
93
|
+
// by a stream error, rethrow that error (more actionable than AbortError).
|
|
94
|
+
for (const ws of pairWriteStreams.values())
|
|
95
|
+
ws.destroy();
|
|
96
|
+
inputStream.destroy();
|
|
97
|
+
throw streamError ?? err;
|
|
98
|
+
}
|
|
99
|
+
finally {
|
|
100
|
+
// Readline 'close' fires before the underlying fs.ReadStream releases its
|
|
101
|
+
// fd — on Windows that race caused ENOTEMPTY on the parent dir.
|
|
102
|
+
// stream/promises.finished is the stdlib "wait until this stream is fully
|
|
103
|
+
// closed" primitive and handles both success and error paths.
|
|
104
|
+
await finished(inputStream).catch(() => { });
|
|
105
|
+
}
|
|
122
106
|
return { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels };
|
|
123
107
|
};
|
|
124
108
|
let db = null;
|
|
@@ -126,6 +110,14 @@ let conn = null;
|
|
|
126
110
|
let currentDbPath = null;
|
|
127
111
|
let ftsLoaded = false;
|
|
128
112
|
let vectorExtensionLoaded = false;
|
|
113
|
+
/**
|
|
114
|
+
* Check if an error indicates a missing column or table (schema-level problem)
|
|
115
|
+
* rather than a transient/connection error. Used for legacy DB fallback logic.
|
|
116
|
+
*/
|
|
117
|
+
const isMissingColumnOrTableError = (msg) => msg.includes('does not exist') ||
|
|
118
|
+
// Kuzu-specific: "(table|column|property) ... not found" — narrow enough to avoid
|
|
119
|
+
// matching transient errors like "connection not found" or "key not found".
|
|
120
|
+
/(table|column|property).*not found/i.test(msg);
|
|
129
121
|
/** Expose the current Database for pool adapter reuse in tests. */
|
|
130
122
|
export const getDatabase = () => db;
|
|
131
123
|
// Global session lock for operations that touch module-level lbug globals.
|
|
@@ -332,15 +324,13 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
|
|
|
332
324
|
}
|
|
333
325
|
// Bulk COPY relationships — split by FROM→TO label pair (LadybugDB requires it)
|
|
334
326
|
const { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels } = await splitRelCsvByLabelPair(csvResult.relCsvPath, csvDir, validTables, getNodeLabel);
|
|
335
|
-
// Close all per-pair write streams before COPY
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
ws.end(
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
});
|
|
343
|
-
})));
|
|
327
|
+
// Close all per-pair write streams before COPY. `stream/promises.finished`
|
|
328
|
+
// resolves on the stream's 'finish' event and rejects on 'error' — replaces
|
|
329
|
+
// a hand-rolled promisification with the stdlib primitive.
|
|
330
|
+
await Promise.all(Array.from(pairWriteStreams.values()).map(async (ws) => {
|
|
331
|
+
ws.end();
|
|
332
|
+
await finished(ws);
|
|
333
|
+
}));
|
|
344
334
|
const insertedRels = totalValidRels;
|
|
345
335
|
const warnings = [];
|
|
346
336
|
if (insertedRels > 0) {
|
|
@@ -801,6 +791,8 @@ export const getLbugStats = async () => {
|
|
|
801
791
|
* Load cached embeddings from LadybugDB before a rebuild.
|
|
802
792
|
* Returns all embedding vectors so they can be re-inserted after the graph is reloaded,
|
|
803
793
|
* avoiding expensive re-embedding of unchanged nodes.
|
|
794
|
+
*
|
|
795
|
+
* Detects old schema (no chunkIndex column) and returns empty cache to trigger rebuild.
|
|
804
796
|
*/
|
|
805
797
|
export const loadCachedEmbeddings = async () => {
|
|
806
798
|
if (!conn) {
|
|
@@ -809,20 +801,51 @@ export const loadCachedEmbeddings = async () => {
|
|
|
809
801
|
const embeddingNodeIds = new Set();
|
|
810
802
|
const embeddings = [];
|
|
811
803
|
try {
|
|
812
|
-
|
|
804
|
+
// Schema migration detection: query with new columns to verify schema version.
|
|
805
|
+
// Old schema only had (nodeId, embedding); new schema adds (id, chunkIndex, startLine, endLine, contentHash).
|
|
806
|
+
// If the query fails (column missing), we return empty cache to force a full rebuild.
|
|
807
|
+
try {
|
|
808
|
+
const check = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex LIMIT 1`);
|
|
809
|
+
const checkResult = Array.isArray(check) ? check[0] : check;
|
|
810
|
+
await checkResult.getAll();
|
|
811
|
+
}
|
|
812
|
+
catch {
|
|
813
|
+
return { embeddingNodeIds: new Set(), embeddings: [] };
|
|
814
|
+
}
|
|
815
|
+
// Try to read contentHash alongside chunk columns
|
|
816
|
+
let rows;
|
|
817
|
+
let hasContentHash = true;
|
|
818
|
+
try {
|
|
819
|
+
rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.embedding AS embedding, e.contentHash AS contentHash`);
|
|
820
|
+
}
|
|
821
|
+
catch (err) {
|
|
822
|
+
// Fallback for legacy DBs without contentHash column
|
|
823
|
+
const msg = err?.message ?? '';
|
|
824
|
+
if (isMissingColumnOrTableError(msg)) {
|
|
825
|
+
hasContentHash = false;
|
|
826
|
+
rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.embedding AS embedding`);
|
|
827
|
+
}
|
|
828
|
+
else {
|
|
829
|
+
throw err;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
813
832
|
const result = Array.isArray(rows) ? rows[0] : rows;
|
|
814
833
|
for (const row of await result.getAll()) {
|
|
815
834
|
const nodeId = String(row.nodeId ?? row[0] ?? '');
|
|
816
835
|
if (!nodeId)
|
|
817
836
|
continue;
|
|
818
837
|
embeddingNodeIds.add(nodeId);
|
|
819
|
-
const embedding = row.embedding ?? row[
|
|
838
|
+
const embedding = row.embedding ?? row[4];
|
|
820
839
|
if (embedding) {
|
|
821
840
|
embeddings.push({
|
|
822
841
|
nodeId,
|
|
842
|
+
chunkIndex: Number(row.chunkIndex ?? row[1] ?? 0),
|
|
843
|
+
startLine: Number(row.startLine ?? row[2] ?? 0),
|
|
844
|
+
endLine: Number(row.endLine ?? row[3] ?? 0),
|
|
823
845
|
embedding: Array.isArray(embedding)
|
|
824
846
|
? embedding.map(Number)
|
|
825
847
|
: Array.from(embedding).map(Number),
|
|
848
|
+
contentHash: hasContentHash ? (row.contentHash ?? row[5] ?? undefined) : undefined,
|
|
826
849
|
});
|
|
827
850
|
}
|
|
828
851
|
}
|
|
@@ -832,6 +855,68 @@ export const loadCachedEmbeddings = async () => {
|
|
|
832
855
|
}
|
|
833
856
|
return { embeddingNodeIds, embeddings };
|
|
834
857
|
};
|
|
858
|
+
/**
|
|
859
|
+
* Fetch existing embedding hashes from CodeEmbedding table for incremental embedding.
|
|
860
|
+
* Returns a Map<nodeId, contentHash> suitable for passing to `runEmbeddingPipeline`.
|
|
861
|
+
* Handles legacy DBs without the `contentHash` column (all rows treated as stale with empty hash).
|
|
862
|
+
* Returns undefined if the CodeEmbedding table does not exist.
|
|
863
|
+
*
|
|
864
|
+
* @param execQuery - Cypher query executor (typically pool-adapter's `executeQuery`)
|
|
865
|
+
*/
|
|
866
|
+
export const fetchExistingEmbeddingHashes = async (execQuery) => {
|
|
867
|
+
try {
|
|
868
|
+
const rows = await execQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.contentHash AS contentHash`);
|
|
869
|
+
if (!rows || rows.length === 0)
|
|
870
|
+
return undefined;
|
|
871
|
+
const map = new Map();
|
|
872
|
+
for (const r of rows) {
|
|
873
|
+
const nodeId = r.nodeId ?? r[0];
|
|
874
|
+
const chunkIndex = r.chunkIndex ?? r[1];
|
|
875
|
+
const startLine = r.startLine ?? r[2];
|
|
876
|
+
const endLine = r.endLine ?? r[3];
|
|
877
|
+
const hash = r.contentHash ?? r[4] ?? STALE_HASH_SENTINEL;
|
|
878
|
+
if (nodeId) {
|
|
879
|
+
const hasChunkMetadata = chunkIndex !== undefined &&
|
|
880
|
+
chunkIndex !== null &&
|
|
881
|
+
startLine !== undefined &&
|
|
882
|
+
startLine !== null &&
|
|
883
|
+
endLine !== undefined &&
|
|
884
|
+
endLine !== null;
|
|
885
|
+
// Empty/null contentHash or missing chunk metadata means legacy row — treat as stale.
|
|
886
|
+
map.set(nodeId, hasChunkMetadata && hash ? hash : STALE_HASH_SENTINEL);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
return map;
|
|
890
|
+
}
|
|
891
|
+
catch (err) {
|
|
892
|
+
const msg = err?.message ?? '';
|
|
893
|
+
if (isMissingColumnOrTableError(msg)) {
|
|
894
|
+
// Legacy rows missing chunk-aware columns — treat every row as stale.
|
|
895
|
+
try {
|
|
896
|
+
const rows = await execQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId`);
|
|
897
|
+
if (!rows || rows.length === 0)
|
|
898
|
+
return undefined;
|
|
899
|
+
const map = new Map();
|
|
900
|
+
for (const r of rows) {
|
|
901
|
+
const nodeId = r.nodeId ?? r[0];
|
|
902
|
+
if (nodeId)
|
|
903
|
+
map.set(nodeId, STALE_HASH_SENTINEL);
|
|
904
|
+
}
|
|
905
|
+
console.log(`[embed] ${map.size} nodes in legacy DB (missing chunk-aware columns) — all treated as stale`);
|
|
906
|
+
return map;
|
|
907
|
+
}
|
|
908
|
+
catch (fallbackErr) {
|
|
909
|
+
const fallbackMsg = fallbackErr?.message ?? '';
|
|
910
|
+
if (isMissingColumnOrTableError(fallbackMsg)) {
|
|
911
|
+
console.log(`[embed] CodeEmbedding table not yet present — full embedding run (${fallbackMsg})`);
|
|
912
|
+
return undefined;
|
|
913
|
+
}
|
|
914
|
+
throw fallbackErr;
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
throw err;
|
|
918
|
+
}
|
|
919
|
+
};
|
|
835
920
|
export const closeLbug = async () => {
|
|
836
921
|
if (conn) {
|
|
837
922
|
try {
|
|
@@ -31,6 +31,7 @@ export declare const IMPL_SCHEMA: string;
|
|
|
31
31
|
export declare const TYPE_ALIAS_SCHEMA: string;
|
|
32
32
|
export declare const CONST_SCHEMA: string;
|
|
33
33
|
export declare const STATIC_SCHEMA: string;
|
|
34
|
+
export declare const VARIABLE_SCHEMA: string;
|
|
34
35
|
export declare const PROPERTY_SCHEMA: string;
|
|
35
36
|
export declare const RECORD_SCHEMA: string;
|
|
36
37
|
export declare const DELEGATE_SCHEMA: string;
|
|
@@ -41,8 +42,15 @@ export declare const MODULE_SCHEMA: string;
|
|
|
41
42
|
export declare const ROUTE_SCHEMA = "\nCREATE NODE TABLE Route (\n id STRING,\n name STRING,\n filePath STRING,\n responseKeys STRING[],\n errorKeys STRING[],\n middleware STRING[],\n PRIMARY KEY (id)\n)";
|
|
42
43
|
export declare const TOOL_SCHEMA = "\nCREATE NODE TABLE Tool (\n id STRING,\n name STRING,\n filePath STRING,\n description STRING,\n PRIMARY KEY (id)\n)";
|
|
43
44
|
export declare const SECTION_SCHEMA = "\nCREATE NODE TABLE Section (\n id STRING,\n name STRING,\n filePath STRING,\n startLine INT64,\n endLine INT64,\n level INT64,\n content STRING,\n description STRING,\n PRIMARY KEY (id)\n)";
|
|
44
|
-
export declare const RELATION_SCHEMA = "\nCREATE REL TABLE CodeRelation (\n FROM File TO File,\n FROM File TO Folder,\n FROM File TO Function,\n FROM File TO Class,\n FROM File TO Interface,\n FROM File TO Method,\n FROM File TO CodeElement,\n FROM File TO `Struct`,\n FROM File TO `Enum`,\n FROM File TO `Macro`,\n FROM File TO `Typedef`,\n FROM File TO `Union`,\n FROM File TO `Namespace`,\n FROM File TO `Trait`,\n FROM File TO `Impl`,\n FROM File TO `TypeAlias`,\n FROM File TO `Const`,\n FROM File TO `Static`,\n FROM File TO `Property`,\n FROM File TO `Record`,\n FROM File TO `Delegate`,\n FROM File TO `Annotation`,\n FROM File TO `Constructor`,\n FROM File TO `Template`,\n FROM File TO `Module`,\n FROM File TO Section,\n FROM Folder TO Folder,\n FROM Folder TO File,\n FROM Function TO Function,\n FROM Function TO Method,\n FROM Function TO Class,\n FROM Function TO Community,\n FROM Function TO `Macro`,\n FROM Function TO `Struct`,\n FROM Function TO `Template`,\n FROM Function TO `Enum`,\n FROM Function TO `Namespace`,\n FROM Function TO `TypeAlias`,\n FROM Function TO `Module`,\n FROM Function TO `Impl`,\n FROM Function TO Interface,\n FROM Function TO `Constructor`,\n FROM Function TO `Const`,\n FROM Function TO `Typedef`,\n FROM Function TO `Union`,\n FROM Function TO `Property`,\n FROM Function TO CodeElement,\n FROM Class TO Method,\n FROM Class TO Function,\n FROM Class TO Class,\n FROM Class TO Interface,\n FROM Class TO Community,\n FROM Class TO `Template`,\n FROM Class TO `TypeAlias`,\n FROM Class TO `Struct`,\n FROM Class TO `Enum`,\n FROM Class TO `Annotation`,\n FROM Class TO `Constructor`,\n FROM Class TO `Trait`,\n FROM Class TO `Macro`,\n FROM Class TO `Impl`,\n FROM Class TO `Union`,\n FROM Class TO `Namespace`,\n FROM Class TO `Typedef`,\n FROM Class TO `Property`,\n FROM Method TO Function,\n FROM Method TO Method,\n FROM Method TO Class,\n FROM Method TO Community,\n FROM Method TO `Template`,\n FROM Method TO `Struct`,\n FROM Method TO `TypeAlias`,\n FROM Method TO `Enum`,\n FROM Method TO `Macro`,\n FROM Method TO `Namespace`,\n FROM Method TO `Module`,\n FROM Method TO `Impl`,\n FROM Method TO Interface,\n FROM Method TO `Constructor`,\n FROM Method TO `Property`,\n FROM Method TO CodeElement,\n FROM `Template` TO `Template`,\n FROM `Template` TO Function,\n FROM `Template` TO Method,\n FROM `Template` TO Class,\n FROM `Template` TO `Struct`,\n FROM `Template` TO `TypeAlias`,\n FROM `Template` TO `Enum`,\n FROM `Template` TO `Macro`,\n FROM `Template` TO Interface,\n FROM `Template` TO `Constructor`,\n FROM `Module` TO `Module`,\n FROM Section TO Section,\n FROM Section TO File,\n FROM File TO Route,\n FROM Function TO Route,\n FROM Method TO Route,\n FROM File TO Tool,\n FROM Function TO Tool,\n FROM Method TO Tool,\n FROM CodeElement TO Community,\n FROM Interface TO Community,\n FROM Interface TO Function,\n FROM Interface TO Method,\n FROM Interface TO Class,\n FROM Interface TO Interface,\n FROM Interface TO `TypeAlias`,\n FROM Interface TO `Struct`,\n FROM Interface TO `Constructor`,\n FROM Interface TO `Property`,\n FROM `Struct` TO Community,\n FROM `Struct` TO `Trait`,\n FROM `Struct` TO `Struct`,\n FROM `Struct` TO Class,\n FROM `Struct` TO `Enum`,\n FROM `Struct` TO Function,\n FROM `Struct` TO Method,\n FROM `Struct` TO Interface,\n FROM `Struct` TO `Constructor`,\n FROM `Struct` TO `Property`,\n FROM `Enum` TO `Enum`,\n FROM `Enum` TO Community,\n FROM `Enum` TO Class,\n FROM `Enum` TO Interface,\n FROM `Macro` TO Community,\n FROM `Macro` TO Function,\n FROM `Macro` TO Method,\n FROM `Module` TO Function,\n FROM `Module` TO Method,\n FROM `Typedef` TO Community,\n FROM `Union` TO Community,\n FROM `Namespace` TO Community,\n FROM `Namespace` TO `Struct`,\n FROM `Trait` TO Method,\n FROM `Trait` TO `Constructor`,\n FROM `Trait` TO `Property`,\n FROM `Trait` TO Community,\n FROM `Impl` TO Method,\n FROM `Impl` TO `Constructor`,\n FROM `Impl` TO `Property`,\n FROM `Impl` TO Community,\n FROM `Impl` TO `Trait`,\n FROM `Impl` TO `Struct`,\n FROM `Impl` TO `Impl`,\n FROM `TypeAlias` TO Community,\n FROM `TypeAlias` TO `Trait`,\n FROM `TypeAlias` TO Class,\n FROM `Const` TO Community,\n FROM `Static` TO Community,\n FROM `Property` TO Community,\n FROM `Record` TO Method,\n FROM `Record` TO `Constructor`,\n FROM `Record` TO `Property`,\n FROM `Record` TO Community,\n FROM `Delegate` TO Community,\n FROM `Annotation` TO Community,\n FROM `Constructor` TO Community,\n FROM `Constructor` TO Interface,\n FROM `Constructor` TO Class,\n FROM `Constructor` TO Method,\n FROM `Constructor` TO Function,\n FROM `Constructor` TO `Constructor`,\n FROM `Constructor` TO `Struct`,\n FROM `Constructor` TO `Macro`,\n FROM `Constructor` TO `Template`,\n FROM `Constructor` TO `TypeAlias`,\n FROM `Constructor` TO `Enum`,\n FROM `Constructor` TO `Annotation`,\n FROM `Constructor` TO `Impl`,\n FROM `Constructor` TO `Namespace`,\n FROM `Constructor` TO `Module`,\n FROM `Constructor` TO `Property`,\n FROM `Constructor` TO `Typedef`,\n FROM `Template` TO Community,\n FROM `Module` TO Community,\n FROM Function TO Process,\n FROM Method TO Process,\n FROM Class TO Process,\n FROM Interface TO Process,\n FROM `Struct` TO Process,\n FROM `Constructor` TO Process,\n FROM `Module` TO Process,\n FROM `Macro` TO Process,\n FROM `Impl` TO Process,\n FROM `Typedef` TO Process,\n FROM `TypeAlias` TO Process,\n FROM `Enum` TO Process,\n FROM `Union` TO Process,\n FROM `Namespace` TO Process,\n FROM `Trait` TO Process,\n FROM `Const` TO Process,\n FROM `Static` TO Process,\n FROM `Property` TO Process,\n FROM `Record` TO Process,\n FROM `Delegate` TO Process,\n FROM `Annotation` TO Process,\n FROM `Template` TO Process,\n FROM CodeElement TO Process,\n FROM Route TO Process,\n FROM Tool TO Process,\n type STRING,\n confidence DOUBLE,\n reason STRING,\n step INT32\n)";
|
|
45
|
+
export declare const RELATION_SCHEMA = "\nCREATE REL TABLE CodeRelation (\n FROM File TO File,\n FROM File TO Folder,\n FROM File TO Function,\n FROM File TO Class,\n FROM File TO Interface,\n FROM File TO Method,\n FROM File TO CodeElement,\n FROM File TO `Struct`,\n FROM File TO `Enum`,\n FROM File TO `Macro`,\n FROM File TO `Typedef`,\n FROM File TO `Union`,\n FROM File TO `Namespace`,\n FROM File TO `Trait`,\n FROM File TO `Impl`,\n FROM File TO `TypeAlias`,\n FROM File TO `Const`,\n FROM File TO `Static`,\n FROM File TO `Variable`,\n FROM File TO `Property`,\n FROM File TO `Record`,\n FROM File TO `Delegate`,\n FROM File TO `Annotation`,\n FROM File TO `Constructor`,\n FROM File TO `Template`,\n FROM File TO `Module`,\n FROM File TO Section,\n FROM Folder TO Folder,\n FROM Folder TO File,\n FROM Function TO Function,\n FROM Function TO Method,\n FROM Function TO Class,\n FROM Function TO Community,\n FROM Function TO `Macro`,\n FROM Function TO `Struct`,\n FROM Function TO `Template`,\n FROM Function TO `Enum`,\n FROM Function TO `Namespace`,\n FROM Function TO `TypeAlias`,\n FROM Function TO `Module`,\n FROM Function TO `Impl`,\n FROM Function TO Interface,\n FROM Function TO `Constructor`,\n FROM Function TO `Const`,\n FROM Function TO `Typedef`,\n FROM Function TO `Union`,\n FROM Function TO `Property`,\n FROM Function TO CodeElement,\n FROM Class TO Method,\n FROM Class TO Function,\n FROM Class TO Class,\n FROM Class TO Interface,\n FROM Class TO Community,\n FROM Class TO `Template`,\n FROM Class TO `TypeAlias`,\n FROM Class TO `Struct`,\n FROM Class TO `Enum`,\n FROM Class TO `Annotation`,\n FROM Class TO `Constructor`,\n FROM Class TO `Trait`,\n FROM Class TO `Macro`,\n FROM Class TO `Impl`,\n FROM Class TO `Union`,\n FROM Class TO `Namespace`,\n FROM Class TO `Typedef`,\n FROM Class TO `Property`,\n FROM Method TO Function,\n FROM Method TO Method,\n FROM Method TO Class,\n FROM Method TO Community,\n FROM Method TO `Template`,\n FROM Method TO `Struct`,\n FROM Method TO `TypeAlias`,\n FROM Method TO `Enum`,\n FROM Method TO `Macro`,\n FROM Method TO `Namespace`,\n FROM Method TO `Module`,\n FROM Method TO `Impl`,\n FROM Method TO Interface,\n FROM Method TO `Constructor`,\n FROM Method TO `Property`,\n FROM Method TO CodeElement,\n FROM `Template` TO `Template`,\n FROM `Template` TO Function,\n FROM `Template` TO Method,\n FROM `Template` TO Class,\n FROM `Template` TO `Struct`,\n FROM `Template` TO `TypeAlias`,\n FROM `Template` TO `Enum`,\n FROM `Template` TO `Macro`,\n FROM `Template` TO Interface,\n FROM `Template` TO `Constructor`,\n FROM `Module` TO `Module`,\n FROM Section TO Section,\n FROM Section TO File,\n FROM File TO Route,\n FROM Function TO Route,\n FROM Method TO Route,\n FROM File TO Tool,\n FROM Function TO Tool,\n FROM Method TO Tool,\n FROM CodeElement TO Community,\n FROM Interface TO Community,\n FROM Interface TO Function,\n FROM Interface TO Method,\n FROM Interface TO Class,\n FROM Interface TO Interface,\n FROM Interface TO `TypeAlias`,\n FROM Interface TO `Struct`,\n FROM Interface TO `Constructor`,\n FROM Interface TO `Property`,\n FROM `Struct` TO Community,\n FROM `Struct` TO `Trait`,\n FROM `Struct` TO `Struct`,\n FROM `Struct` TO Class,\n FROM `Struct` TO `Enum`,\n FROM `Struct` TO Function,\n FROM `Struct` TO Method,\n FROM `Struct` TO Interface,\n FROM `Struct` TO `Constructor`,\n FROM `Struct` TO `Property`,\n FROM `Enum` TO `Enum`,\n FROM `Enum` TO Community,\n FROM `Enum` TO Class,\n FROM `Enum` TO Interface,\n FROM `Macro` TO Community,\n FROM `Macro` TO Function,\n FROM `Macro` TO Method,\n FROM `Module` TO Function,\n FROM `Module` TO Method,\n FROM `Typedef` TO Community,\n FROM `Union` TO Community,\n FROM `Namespace` TO Community,\n FROM `Namespace` TO `Struct`,\n FROM `Trait` TO Method,\n FROM `Trait` TO `Constructor`,\n FROM `Trait` TO `Property`,\n FROM `Trait` TO Community,\n FROM `Impl` TO Method,\n FROM `Impl` TO `Constructor`,\n FROM `Impl` TO `Property`,\n FROM `Impl` TO Community,\n FROM `Impl` TO `Trait`,\n FROM `Impl` TO `Struct`,\n FROM `Impl` TO `Impl`,\n FROM `TypeAlias` TO Community,\n FROM `TypeAlias` TO `Trait`,\n FROM `TypeAlias` TO Class,\n FROM `Const` TO Community,\n FROM `Static` TO Community,\n FROM `Variable` TO Community,\n FROM `Property` TO Community,\n FROM `Record` TO Method,\n FROM `Record` TO `Constructor`,\n FROM `Record` TO `Property`,\n FROM `Record` TO Community,\n FROM `Delegate` TO Community,\n FROM `Annotation` TO Community,\n FROM `Constructor` TO Community,\n FROM `Constructor` TO Interface,\n FROM `Constructor` TO Class,\n FROM `Constructor` TO Method,\n FROM `Constructor` TO Function,\n FROM `Constructor` TO `Constructor`,\n FROM `Constructor` TO `Struct`,\n FROM `Constructor` TO `Macro`,\n FROM `Constructor` TO `Template`,\n FROM `Constructor` TO `TypeAlias`,\n FROM `Constructor` TO `Enum`,\n FROM `Constructor` TO `Annotation`,\n FROM `Constructor` TO `Impl`,\n FROM `Constructor` TO `Namespace`,\n FROM `Constructor` TO `Module`,\n FROM `Constructor` TO `Property`,\n FROM `Constructor` TO `Typedef`,\n FROM `Template` TO Community,\n FROM `Module` TO Community,\n FROM Function TO Process,\n FROM Method TO Process,\n FROM Class TO Process,\n FROM Interface TO Process,\n FROM `Struct` TO Process,\n FROM `Constructor` TO Process,\n FROM `Module` TO Process,\n FROM `Macro` TO Process,\n FROM `Impl` TO Process,\n FROM `Typedef` TO Process,\n FROM `TypeAlias` TO Process,\n FROM `Enum` TO Process,\n FROM `Union` TO Process,\n FROM `Namespace` TO Process,\n FROM `Trait` TO Process,\n FROM `Const` TO Process,\n FROM `Static` TO Process,\n FROM `Variable` TO Process,\n FROM `Property` TO Process,\n FROM `Record` TO Process,\n FROM `Delegate` TO Process,\n FROM `Annotation` TO Process,\n FROM `Template` TO Process,\n FROM CodeElement TO Process,\n FROM Route TO Process,\n FROM Tool TO Process,\n type STRING,\n confidence DOUBLE,\n reason STRING,\n step INT32\n)";
|
|
45
46
|
export declare const EMBEDDING_DIMS: number;
|
|
47
|
+
/** HNSW vector index name for the CodeEmbedding table. */
|
|
48
|
+
export declare const EMBEDDING_INDEX_NAME = "code_embedding_idx";
|
|
49
|
+
/**
|
|
50
|
+
* Sentinel value for "no content hash available" — used in legacy DBs and null rows.
|
|
51
|
+
* Nodes with this hash are always treated as stale and re-embedded.
|
|
52
|
+
*/
|
|
53
|
+
export declare const STALE_HASH_SENTINEL = "";
|
|
46
54
|
export declare const EMBEDDING_SCHEMA: string;
|
|
47
55
|
/**
|
|
48
56
|
* Create vector index for semantic search
|
package/dist/core/lbug/schema.js
CHANGED
|
@@ -149,6 +149,7 @@ export const IMPL_SCHEMA = CODE_ELEMENT_BASE('Impl');
|
|
|
149
149
|
export const TYPE_ALIAS_SCHEMA = CODE_ELEMENT_BASE('TypeAlias');
|
|
150
150
|
export const CONST_SCHEMA = CODE_ELEMENT_BASE('Const');
|
|
151
151
|
export const STATIC_SCHEMA = CODE_ELEMENT_BASE('Static');
|
|
152
|
+
export const VARIABLE_SCHEMA = CODE_ELEMENT_BASE('Variable');
|
|
152
153
|
export const PROPERTY_SCHEMA = CODE_ELEMENT_BASE('Property');
|
|
153
154
|
export const RECORD_SCHEMA = CODE_ELEMENT_BASE('Record');
|
|
154
155
|
export const DELEGATE_SCHEMA = CODE_ELEMENT_BASE('Delegate');
|
|
@@ -213,6 +214,7 @@ CREATE REL TABLE ${REL_TABLE_NAME} (
|
|
|
213
214
|
FROM File TO \`TypeAlias\`,
|
|
214
215
|
FROM File TO \`Const\`,
|
|
215
216
|
FROM File TO \`Static\`,
|
|
217
|
+
FROM File TO \`Variable\`,
|
|
216
218
|
FROM File TO \`Property\`,
|
|
217
219
|
FROM File TO \`Record\`,
|
|
218
220
|
FROM File TO \`Delegate\`,
|
|
@@ -344,6 +346,7 @@ CREATE REL TABLE ${REL_TABLE_NAME} (
|
|
|
344
346
|
FROM \`TypeAlias\` TO Class,
|
|
345
347
|
FROM \`Const\` TO Community,
|
|
346
348
|
FROM \`Static\` TO Community,
|
|
349
|
+
FROM \`Variable\` TO Community,
|
|
347
350
|
FROM \`Property\` TO Community,
|
|
348
351
|
FROM \`Record\` TO Method,
|
|
349
352
|
FROM \`Record\` TO \`Constructor\`,
|
|
@@ -387,6 +390,7 @@ CREATE REL TABLE ${REL_TABLE_NAME} (
|
|
|
387
390
|
FROM \`Trait\` TO Process,
|
|
388
391
|
FROM \`Const\` TO Process,
|
|
389
392
|
FROM \`Static\` TO Process,
|
|
393
|
+
FROM \`Variable\` TO Process,
|
|
390
394
|
FROM \`Property\` TO Process,
|
|
391
395
|
FROM \`Record\` TO Process,
|
|
392
396
|
FROM \`Delegate\` TO Process,
|
|
@@ -410,18 +414,30 @@ if (Number.isNaN(_rawDims) || _rawDims <= 0) {
|
|
|
410
414
|
throw new Error(`GITNEXUS_EMBEDDING_DIMS must be a positive integer, got "${process.env.GITNEXUS_EMBEDDING_DIMS}"`);
|
|
411
415
|
}
|
|
412
416
|
export const EMBEDDING_DIMS = _rawDims;
|
|
417
|
+
/** HNSW vector index name for the CodeEmbedding table. */
|
|
418
|
+
export const EMBEDDING_INDEX_NAME = 'code_embedding_idx';
|
|
419
|
+
/**
|
|
420
|
+
* Sentinel value for "no content hash available" — used in legacy DBs and null rows.
|
|
421
|
+
* Nodes with this hash are always treated as stale and re-embedded.
|
|
422
|
+
*/
|
|
423
|
+
export const STALE_HASH_SENTINEL = '';
|
|
413
424
|
export const EMBEDDING_SCHEMA = `
|
|
414
425
|
CREATE NODE TABLE ${EMBEDDING_TABLE_NAME} (
|
|
426
|
+
id STRING,
|
|
415
427
|
nodeId STRING,
|
|
428
|
+
chunkIndex INT32,
|
|
429
|
+
startLine INT64,
|
|
430
|
+
endLine INT64,
|
|
416
431
|
embedding FLOAT[${EMBEDDING_DIMS}],
|
|
417
|
-
|
|
432
|
+
contentHash STRING,
|
|
433
|
+
PRIMARY KEY (id)
|
|
418
434
|
)`;
|
|
419
435
|
/**
|
|
420
436
|
* Create vector index for semantic search
|
|
421
437
|
* Uses HNSW (Hierarchical Navigable Small World) algorithm with cosine similarity
|
|
422
438
|
*/
|
|
423
439
|
export const CREATE_VECTOR_INDEX_QUERY = `
|
|
424
|
-
CALL CREATE_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '
|
|
440
|
+
CALL CREATE_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}', 'embedding', metric := 'cosine')
|
|
425
441
|
`;
|
|
426
442
|
// ============================================================================
|
|
427
443
|
// ALL SCHEMA QUERIES IN ORDER
|
|
@@ -449,6 +465,7 @@ export const NODE_SCHEMA_QUERIES = [
|
|
|
449
465
|
TYPE_ALIAS_SCHEMA,
|
|
450
466
|
CONST_SCHEMA,
|
|
451
467
|
STATIC_SCHEMA,
|
|
468
|
+
VARIABLE_SCHEMA,
|
|
452
469
|
PROPERTY_SCHEMA,
|
|
453
470
|
RECORD_SCHEMA,
|
|
454
471
|
DELEGATE_SCHEMA,
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -15,6 +15,8 @@ import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReuse
|
|
|
15
15
|
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
|
|
16
16
|
import { getCurrentCommit, hasGitDir } from '../storage/git.js';
|
|
17
17
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
18
|
+
import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
|
|
19
|
+
import { STALE_HASH_SENTINEL } from './lbug/schema.js';
|
|
18
20
|
/** Threshold: auto-skip embeddings for repos with more nodes than this */
|
|
19
21
|
const EMBEDDING_NODE_LIMIT = 50_000;
|
|
20
22
|
export const PHASE_LABELS = {
|
|
@@ -144,12 +146,12 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
144
146
|
}
|
|
145
147
|
else {
|
|
146
148
|
progress('embeddings', 88, `Restoring ${cachedEmbeddings.length} cached embeddings...`);
|
|
149
|
+
const { batchInsertEmbeddings: batchInsert } = await import('./embeddings/embedding-pipeline.js');
|
|
147
150
|
const EMBED_BATCH = 200;
|
|
148
151
|
for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) {
|
|
149
152
|
const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH);
|
|
150
|
-
const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding }));
|
|
151
153
|
try {
|
|
152
|
-
await executeWithReusedStatement
|
|
154
|
+
await batchInsert(executeWithReusedStatement, batch);
|
|
153
155
|
}
|
|
154
156
|
catch {
|
|
155
157
|
/* some may fail if node was removed, that's fine */
|
|
@@ -170,6 +172,17 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
170
172
|
const httpMode = isHttpMode();
|
|
171
173
|
progress('embeddings', 90, httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...');
|
|
172
174
|
const { runEmbeddingPipeline } = await import('./embeddings/embedding-pipeline.js');
|
|
175
|
+
// Build a Map<nodeId, contentHash> from cached embeddings for incremental mode
|
|
176
|
+
let existingEmbeddings;
|
|
177
|
+
if (cachedEmbeddingNodeIds.size > 0) {
|
|
178
|
+
existingEmbeddings = new Map();
|
|
179
|
+
for (const e of cachedEmbeddings) {
|
|
180
|
+
existingEmbeddings.set(e.nodeId, e.contentHash ?? STALE_HASH_SENTINEL);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
const { readServerMapping } = await import('./embeddings/server-mapping.js');
|
|
184
|
+
const projectName = path.basename(repoPath);
|
|
185
|
+
const serverName = await readServerMapping(projectName);
|
|
173
186
|
await runEmbeddingPipeline(executeQuery, executeWithReusedStatement, (p) => {
|
|
174
187
|
const scaled = 90 + Math.round((p.percent / 100) * 8);
|
|
175
188
|
const label = p.phase === 'loading-model'
|
|
@@ -178,14 +191,14 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
178
191
|
: 'Loading embedding model...'
|
|
179
192
|
: `Embedding ${p.nodesProcessed || 0}/${p.totalNodes || '?'}`;
|
|
180
193
|
progress('embeddings', scaled, label);
|
|
181
|
-
}, {}, cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined);
|
|
194
|
+
}, {}, cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, { repoName: projectName, serverName }, existingEmbeddings);
|
|
182
195
|
}
|
|
183
196
|
// ── Phase 5: Finalize (98–100%) ───────────────────────────────────
|
|
184
197
|
progress('done', 98, 'Saving metadata...');
|
|
185
198
|
// Count embeddings in the index (cached + newly generated)
|
|
186
199
|
let embeddingCount = 0;
|
|
187
200
|
try {
|
|
188
|
-
const embResult = await executeQuery(`MATCH (e
|
|
201
|
+
const embResult = await executeQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN count(e) AS cnt`);
|
|
189
202
|
embeddingCount = embResult?.[0]?.cnt ?? 0;
|
|
190
203
|
}
|
|
191
204
|
catch {
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import Parser from 'tree-sitter';
|
|
2
2
|
import { SupportedLanguages } from '../../_shared/index.js';
|
|
3
3
|
export declare const isLanguageAvailable: (language: SupportedLanguages) => boolean;
|
|
4
|
+
export declare const resolveLanguageKey: (language: SupportedLanguages, filePath?: string) => string;
|
|
5
|
+
export declare const getLanguageGrammar: (language: SupportedLanguages, filePath?: string) => any;
|
|
4
6
|
export declare const loadParser: () => Promise<Parser>;
|
|
5
7
|
export declare const loadLanguage: (language: SupportedLanguages, filePath?: string) => Promise<void>;
|
|
8
|
+
export declare const createParserForLanguage: (language: SupportedLanguages, filePath?: string) => Promise<Parser>;
|
|
@@ -50,6 +50,17 @@ const languageMap = {
|
|
|
50
50
|
...(Swift ? { [SupportedLanguages.Swift]: Swift } : {}),
|
|
51
51
|
};
|
|
52
52
|
export const isLanguageAvailable = (language) => language in languageMap;
|
|
53
|
+
export const resolveLanguageKey = (language, filePath) => language === SupportedLanguages.TypeScript && filePath?.endsWith('.tsx')
|
|
54
|
+
? `${language}:tsx`
|
|
55
|
+
: language;
|
|
56
|
+
export const getLanguageGrammar = (language, filePath) => {
|
|
57
|
+
const key = resolveLanguageKey(language, filePath);
|
|
58
|
+
const lang = languageMap[key];
|
|
59
|
+
if (!lang) {
|
|
60
|
+
throw new Error(`Unsupported language: ${language}`);
|
|
61
|
+
}
|
|
62
|
+
return lang;
|
|
63
|
+
};
|
|
53
64
|
export const loadParser = async () => {
|
|
54
65
|
if (parser)
|
|
55
66
|
return parser;
|
|
@@ -59,12 +70,10 @@ export const loadParser = async () => {
|
|
|
59
70
|
export const loadLanguage = async (language, filePath) => {
|
|
60
71
|
if (!parser)
|
|
61
72
|
await loadParser();
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
}
|
|
69
|
-
parser.setLanguage(lang);
|
|
73
|
+
parser.setLanguage(getLanguageGrammar(language, filePath));
|
|
74
|
+
};
|
|
75
|
+
export const createParserForLanguage = async (language, filePath) => {
|
|
76
|
+
const freshParser = new Parser();
|
|
77
|
+
freshParser.setLanguage(getLanguageGrammar(language, filePath));
|
|
78
|
+
return freshParser;
|
|
70
79
|
};
|
|
@@ -30,6 +30,11 @@ export const initEmbedder = async () => {
|
|
|
30
30
|
initPromise = (async () => {
|
|
31
31
|
try {
|
|
32
32
|
env.allowLocalModels = false;
|
|
33
|
+
// Default cache to user-writable location. transformers.js defaults to
|
|
34
|
+
// ./node_modules/.cache inside its own install dir, which is unwritable
|
|
35
|
+
// when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
|
|
36
|
+
// Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
|
|
37
|
+
env.cacheDir = process.env.HF_HOME ?? `${process.env.HOME}/.cache/huggingface`;
|
|
33
38
|
console.error('GitNexus: Loading embedding model (first search may take a moment)...');
|
|
34
39
|
// Try GPU first (DirectML on Windows, CUDA on Linux), fall back to CPU
|
|
35
40
|
const isWindows = process.platform === 'win32';
|