gitnexus 1.6.2-rc.2 → 1.6.2-rc.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/dist/_shared/lbug/schema-constants.d.ts +1 -1
  2. package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
  3. package/dist/_shared/lbug/schema-constants.js +1 -0
  4. package/dist/_shared/lbug/schema-constants.js.map +1 -1
  5. package/dist/cli/analyze.js +3 -0
  6. package/dist/core/embeddings/ast-utils.d.ts +22 -0
  7. package/dist/core/embeddings/ast-utils.js +105 -0
  8. package/dist/core/embeddings/character-chunk.d.ts +12 -0
  9. package/dist/core/embeddings/character-chunk.js +43 -0
  10. package/dist/core/embeddings/chunker.d.ts +14 -0
  11. package/dist/core/embeddings/chunker.js +234 -0
  12. package/dist/core/embeddings/embedder.js +5 -0
  13. package/dist/core/embeddings/embedding-pipeline.d.ts +29 -24
  14. package/dist/core/embeddings/embedding-pipeline.js +244 -125
  15. package/dist/core/embeddings/line-index.d.ts +7 -0
  16. package/dist/core/embeddings/line-index.js +42 -0
  17. package/dist/core/embeddings/server-mapping.d.ts +15 -0
  18. package/dist/core/embeddings/server-mapping.js +33 -0
  19. package/dist/core/embeddings/structural-extractor.d.ts +15 -0
  20. package/dist/core/embeddings/structural-extractor.js +58 -0
  21. package/dist/core/embeddings/text-generator.d.ts +20 -13
  22. package/dist/core/embeddings/text-generator.js +151 -119
  23. package/dist/core/embeddings/types.d.ts +81 -3
  24. package/dist/core/embeddings/types.js +105 -3
  25. package/dist/core/group/extractors/http-patterns/node.js +130 -0
  26. package/dist/core/group/extractors/manifest-extractor.js +20 -5
  27. package/dist/core/group/sync.js +49 -1
  28. package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
  29. package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
  30. package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
  31. package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
  32. package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
  33. package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
  34. package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
  35. package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
  36. package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
  37. package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
  38. package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
  39. package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
  40. package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
  41. package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
  42. package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
  43. package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
  44. package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
  45. package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
  46. package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
  47. package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
  48. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
  49. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
  50. package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
  51. package/dist/core/ingestion/call-extractors/generic.js +59 -0
  52. package/dist/core/ingestion/call-processor.d.ts +1 -3
  53. package/dist/core/ingestion/call-processor.js +49 -47
  54. package/dist/core/ingestion/call-types.d.ts +60 -0
  55. package/dist/core/ingestion/call-types.js +2 -0
  56. package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
  57. package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
  58. package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
  59. package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
  60. package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
  61. package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
  62. package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
  63. package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
  64. package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
  65. package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
  66. package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
  67. package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
  68. package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
  69. package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
  70. package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
  71. package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
  72. package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
  73. package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
  74. package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
  75. package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
  76. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
  77. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
  78. package/dist/core/ingestion/field-types.d.ts +1 -1
  79. package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
  80. package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
  81. package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
  82. package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
  83. package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
  84. package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
  85. package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
  86. package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
  87. package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
  88. package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
  89. package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
  90. package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
  91. package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
  92. package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
  93. package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
  94. package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
  95. package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
  96. package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
  97. package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
  98. package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
  99. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
  100. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
  101. package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
  102. package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
  103. package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
  104. package/dist/core/ingestion/import-resolvers/go.js +4 -19
  105. package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
  106. package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
  107. package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
  108. package/dist/core/ingestion/import-resolvers/php.js +4 -7
  109. package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
  110. package/dist/core/ingestion/import-resolvers/python.js +3 -18
  111. package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
  112. package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
  113. package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
  114. package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
  115. package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
  116. package/dist/core/ingestion/import-resolvers/rust.js +4 -47
  117. package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
  118. package/dist/core/ingestion/import-resolvers/standard.js +7 -8
  119. package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
  120. package/dist/core/ingestion/language-provider.d.ts +12 -0
  121. package/dist/core/ingestion/languages/c-cpp.js +15 -12
  122. package/dist/core/ingestion/languages/csharp.js +11 -21
  123. package/dist/core/ingestion/languages/dart.js +11 -7
  124. package/dist/core/ingestion/languages/go.js +11 -20
  125. package/dist/core/ingestion/languages/java.js +11 -18
  126. package/dist/core/ingestion/languages/kotlin.js +11 -13
  127. package/dist/core/ingestion/languages/php.js +11 -7
  128. package/dist/core/ingestion/languages/python.js +11 -7
  129. package/dist/core/ingestion/languages/ruby.js +11 -7
  130. package/dist/core/ingestion/languages/rust.js +11 -7
  131. package/dist/core/ingestion/languages/swift.js +11 -18
  132. package/dist/core/ingestion/languages/typescript.js +15 -23
  133. package/dist/core/ingestion/languages/vue.js +11 -17
  134. package/dist/core/ingestion/model/index.d.ts +2 -2
  135. package/dist/core/ingestion/model/index.js +1 -1
  136. package/dist/core/ingestion/model/resolve.d.ts +3 -0
  137. package/dist/core/ingestion/model/resolve.js +6 -2
  138. package/dist/core/ingestion/parsing-processor.d.ts +1 -2
  139. package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
  140. package/dist/core/ingestion/tree-sitter-queries.js +81 -0
  141. package/dist/core/ingestion/type-env.d.ts +1 -1
  142. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
  143. package/dist/core/ingestion/utils/ast-helpers.js +3 -0
  144. package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
  145. package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
  146. package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
  147. package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
  148. package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
  149. package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
  150. package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
  151. package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
  152. package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
  153. package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
  154. package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
  155. package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
  156. package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
  157. package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
  158. package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
  159. package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
  160. package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
  161. package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
  162. package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
  163. package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
  164. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
  165. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
  166. package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
  167. package/dist/core/ingestion/variable-extractors/generic.js +80 -0
  168. package/dist/core/ingestion/variable-types.d.ts +82 -0
  169. package/dist/core/ingestion/variable-types.js +2 -0
  170. package/dist/core/ingestion/workers/parse-worker.js +196 -166
  171. package/dist/core/ingestion/workers/worker-pool.js +3 -0
  172. package/dist/core/lbug/csv-generator.js +1 -0
  173. package/dist/core/lbug/lbug-adapter.d.ts +13 -4
  174. package/dist/core/lbug/lbug-adapter.js +166 -81
  175. package/dist/core/lbug/schema.d.ts +9 -1
  176. package/dist/core/lbug/schema.js +19 -2
  177. package/dist/core/run-analyze.js +17 -4
  178. package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
  179. package/dist/core/tree-sitter/parser-loader.js +17 -8
  180. package/dist/mcp/core/embedder.js +5 -0
  181. package/dist/mcp/local/local-backend.js +29 -19
  182. package/dist/server/api.js +10 -21
  183. package/package.json +5 -3
  184. package/scripts/build-tree-sitter-proto.cjs +82 -0
  185. package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
  186. package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
  187. package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
  188. package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
  189. package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
  190. package/vendor/tree-sitter-proto/package.json +1 -7
  191. package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
  192. package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
  193. package/dist/core/ingestion/call-sites/java.d.ts +0 -9
  194. package/dist/core/ingestion/call-sites/java.js +0 -30
  195. package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
  196. package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
  197. package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
  198. package/dist/core/ingestion/import-resolvers/vue.js +0 -9
@@ -1,9 +1,11 @@
1
1
  import fs from 'fs/promises';
2
2
  import { createReadStream, createWriteStream } from 'fs';
3
3
  import { createInterface } from 'readline';
4
+ import { once } from 'events';
5
+ import { finished } from 'stream/promises';
4
6
  import path from 'path';
5
7
  import lbug from '@ladybugdb/core';
6
- import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, } from './schema.js';
8
+ import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, STALE_HASH_SENTINEL, } from './schema.js';
7
9
  import { streamAllCSVsToDisk } from './csv-generator.js';
8
10
  /**
9
11
  * Split a relationship CSV into per-label-pair files on disk.
@@ -25,100 +27,82 @@ export const splitRelCsvByLabelPair = async (csvPath, csvDir, validTables, getNo
25
27
  const pairWriteStreams = new Map();
26
28
  let skippedRels = 0;
27
29
  let totalValidRels = 0;
28
- await new Promise((resolve, reject) => {
29
- const inputStream = createReadStream(csvPath, 'utf-8');
30
- const rl = createInterface({
31
- input: inputStream,
32
- crlfDelay: Infinity,
33
- });
34
- // Track which streams are already waiting for drain to prevent
35
- // listener accumulation. rl.pause() is not synchronous — buffered
36
- // line events continue firing after pause(), and without this guard
37
- // each line targeting the same pairKey would add another drain listener.
38
- const waitingForDrain = new Set();
39
- let settled = false;
40
- const cleanup = (err) => {
41
- if (settled)
42
- return;
43
- settled = true;
44
- try {
45
- rl.close();
46
- }
47
- catch { }
48
- try {
49
- inputStream.destroy();
50
- }
51
- catch { }
52
- for (const ws of pairWriteStreams.values()) {
53
- try {
54
- ws.destroy();
55
- }
56
- catch { }
57
- }
58
- reject(err);
59
- };
30
+ const inputStream = createReadStream(csvPath, 'utf-8');
31
+ const rl = createInterface({ input: inputStream, crlfDelay: Infinity });
32
+ // If any pair WriteStream errors (disk full, EMFILE, etc.) or the input
33
+ // stream fails, we need to abort the pending `once(ws, 'drain')` await.
34
+ // An AbortController gives us one signal to cancel all pending waits
35
+ // without a custom state machine.
36
+ const abortOnError = new AbortController();
37
+ let streamError = null;
38
+ const markStreamError = (err) => {
39
+ streamError ??= err;
40
+ abortOnError.abort(err);
41
+ };
42
+ try {
43
+ // `for await (const line of rl)` replaces the old manual
44
+ // on('line')/pause()/resume()/waitingForDrain state machine: readline's
45
+ // async iterator naturally serializes line delivery with our awaits, so
46
+ // at most one ws can be in backpressure at a time and we just await its
47
+ // 'drain' event.
60
48
  let isFirst = true;
61
- rl.on('line', (line) => {
49
+ for await (const line of rl) {
50
+ if (streamError)
51
+ throw streamError;
62
52
  if (isFirst) {
63
53
  relHeader = line;
64
54
  isFirst = false;
65
- return;
55
+ continue;
66
56
  }
67
57
  if (!line.trim())
68
- return;
58
+ continue;
69
59
  const match = line.match(/"([^"]*)","([^"]*)"/);
70
60
  if (!match) {
71
61
  skippedRels++;
72
- return;
62
+ continue;
73
63
  }
74
64
  const fromLabel = getNodeLabel(match[1]);
75
65
  const toLabel = getNodeLabel(match[2]);
76
66
  if (!validTables.has(fromLabel) || !validTables.has(toLabel)) {
77
67
  skippedRels++;
78
- return;
68
+ continue;
79
69
  }
80
70
  const pairKey = `${fromLabel}|${toLabel}`;
81
71
  let ws = pairWriteStreams.get(pairKey);
82
72
  if (!ws) {
83
73
  const pairCsvPath = path.join(csvDir, `rel_${fromLabel}_${toLabel}.csv`);
84
74
  ws = wsFactory(pairCsvPath);
85
- // If any per-pair WriteStream errors (disk full, EMFILE, etc.),
86
- // tear down everything and reject the Promise. Without this handler,
87
- // a stream error while rl is paused waiting for drain would cause
88
- // the drain callback to never fire and the Promise to hang forever.
89
- ws.on('error', cleanup);
90
- ws.write(relHeader + '\n');
75
+ ws.on('error', markStreamError);
91
76
  pairWriteStreams.set(pairKey, ws);
92
77
  relsByPairMeta.set(pairKey, { csvPath: pairCsvPath, rows: 0 });
78
+ if (!ws.write(relHeader + '\n')) {
79
+ await once(ws, 'drain', { signal: abortOnError.signal });
80
+ }
81
+ }
82
+ if (!ws.write(line + '\n')) {
83
+ await once(ws, 'drain', { signal: abortOnError.signal });
93
84
  }
94
- const ok = ws.write(line + '\n');
95
85
  relsByPairMeta.get(pairKey).rows++;
96
86
  totalValidRels++;
97
- // Handle backpressure: pause reading when the write buffer is full,
98
- // resume when the stream drains. Prevents unbounded memory growth
99
- // on repos with millions of relationships.
100
- // Guard with waitingForDrain to ensure only one drain listener is
101
- // registered per stream at a time — rl.pause() doesn't stop buffered
102
- // line events immediately. Only resume when ALL streams have drained
103
- // to avoid writing into still-full streams.
104
- if (!ok && !waitingForDrain.has(pairKey)) {
105
- waitingForDrain.add(pairKey);
106
- rl.pause();
107
- ws.once('drain', () => {
108
- waitingForDrain.delete(pairKey);
109
- if (waitingForDrain.size === 0)
110
- rl.resume();
111
- });
112
- }
113
- });
114
- rl.on('close', () => {
115
- if (!settled) {
116
- settled = true;
117
- resolve();
118
- }
119
- });
120
- rl.on('error', cleanup);
121
- });
87
+ }
88
+ if (streamError)
89
+ throw streamError;
90
+ }
91
+ catch (err) {
92
+ // Tear down everything so no fd is left dangling. If the abort was caused
93
+ // by a stream error, rethrow that error (more actionable than AbortError).
94
+ for (const ws of pairWriteStreams.values())
95
+ ws.destroy();
96
+ inputStream.destroy();
97
+ throw streamError ?? err;
98
+ }
99
+ finally {
100
+ // Readline 'close' fires before the underlying fs.ReadStream releases its
101
+ // fd — on Windows that race caused ENOTEMPTY on the parent dir.
102
+ // stream/promises.finished is the stdlib "wait until this stream is fully
103
+ // closed" primitive and handles both success and error paths.
104
+ await finished(inputStream).catch(() => { });
105
+ }
122
106
  return { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels };
123
107
  };
124
108
  let db = null;
@@ -126,6 +110,14 @@ let conn = null;
126
110
  let currentDbPath = null;
127
111
  let ftsLoaded = false;
128
112
  let vectorExtensionLoaded = false;
113
+ /**
114
+ * Check if an error indicates a missing column or table (schema-level problem)
115
+ * rather than a transient/connection error. Used for legacy DB fallback logic.
116
+ */
117
+ const isMissingColumnOrTableError = (msg) => msg.includes('does not exist') ||
118
+ // Kuzu-specific: "(table|column|property) ... not found" — narrow enough to avoid
119
+ // matching transient errors like "connection not found" or "key not found".
120
+ /(table|column|property).*not found/i.test(msg);
129
121
  /** Expose the current Database for pool adapter reuse in tests. */
130
122
  export const getDatabase = () => db;
131
123
  // Global session lock for operations that touch module-level lbug globals.
@@ -332,15 +324,13 @@ export const loadGraphToLbug = async (graph, repoPath, storagePath, onProgress)
332
324
  }
333
325
  // Bulk COPY relationships — split by FROM→TO label pair (LadybugDB requires it)
334
326
  const { relHeader, relsByPairMeta, pairWriteStreams, skippedRels, totalValidRels } = await splitRelCsvByLabelPair(csvResult.relCsvPath, csvDir, validTables, getNodeLabel);
335
- // Close all per-pair write streams before COPY
336
- await Promise.all(Array.from(pairWriteStreams.values()).map((ws) => new Promise((resolve, reject) => {
337
- const onError = (err) => reject(err);
338
- ws.on('error', onError);
339
- ws.end(() => {
340
- ws.removeListener('error', onError);
341
- resolve();
342
- });
343
- })));
327
+ // Close all per-pair write streams before COPY. `stream/promises.finished`
328
+ // resolves on the stream's 'finish' event and rejects on 'error' — replaces
329
+ // a hand-rolled promisification with the stdlib primitive.
330
+ await Promise.all(Array.from(pairWriteStreams.values()).map(async (ws) => {
331
+ ws.end();
332
+ await finished(ws);
333
+ }));
344
334
  const insertedRels = totalValidRels;
345
335
  const warnings = [];
346
336
  if (insertedRels > 0) {
@@ -801,6 +791,8 @@ export const getLbugStats = async () => {
801
791
  * Load cached embeddings from LadybugDB before a rebuild.
802
792
  * Returns all embedding vectors so they can be re-inserted after the graph is reloaded,
803
793
  * avoiding expensive re-embedding of unchanged nodes.
794
+ *
795
+ * Detects old schema (no chunkIndex column) and returns empty cache to trigger rebuild.
804
796
  */
805
797
  export const loadCachedEmbeddings = async () => {
806
798
  if (!conn) {
@@ -809,20 +801,51 @@ export const loadCachedEmbeddings = async () => {
809
801
  const embeddingNodeIds = new Set();
810
802
  const embeddings = [];
811
803
  try {
812
- const rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.embedding AS embedding`);
804
+ // Schema migration detection: query with new columns to verify schema version.
805
+ // Old schema only had (nodeId, embedding); new schema adds (id, chunkIndex, startLine, endLine, contentHash).
806
+ // If the query fails (column missing), we return empty cache to force a full rebuild.
807
+ try {
808
+ const check = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex LIMIT 1`);
809
+ const checkResult = Array.isArray(check) ? check[0] : check;
810
+ await checkResult.getAll();
811
+ }
812
+ catch {
813
+ return { embeddingNodeIds: new Set(), embeddings: [] };
814
+ }
815
+ // Try to read contentHash alongside chunk columns
816
+ let rows;
817
+ let hasContentHash = true;
818
+ try {
819
+ rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.embedding AS embedding, e.contentHash AS contentHash`);
820
+ }
821
+ catch (err) {
822
+ // Fallback for legacy DBs without contentHash column
823
+ const msg = err?.message ?? '';
824
+ if (isMissingColumnOrTableError(msg)) {
825
+ hasContentHash = false;
826
+ rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.embedding AS embedding`);
827
+ }
828
+ else {
829
+ throw err;
830
+ }
831
+ }
813
832
  const result = Array.isArray(rows) ? rows[0] : rows;
814
833
  for (const row of await result.getAll()) {
815
834
  const nodeId = String(row.nodeId ?? row[0] ?? '');
816
835
  if (!nodeId)
817
836
  continue;
818
837
  embeddingNodeIds.add(nodeId);
819
- const embedding = row.embedding ?? row[1];
838
+ const embedding = row.embedding ?? row[4];
820
839
  if (embedding) {
821
840
  embeddings.push({
822
841
  nodeId,
842
+ chunkIndex: Number(row.chunkIndex ?? row[1] ?? 0),
843
+ startLine: Number(row.startLine ?? row[2] ?? 0),
844
+ endLine: Number(row.endLine ?? row[3] ?? 0),
823
845
  embedding: Array.isArray(embedding)
824
846
  ? embedding.map(Number)
825
847
  : Array.from(embedding).map(Number),
848
+ contentHash: hasContentHash ? (row.contentHash ?? row[5] ?? undefined) : undefined,
826
849
  });
827
850
  }
828
851
  }
@@ -832,6 +855,68 @@ export const loadCachedEmbeddings = async () => {
832
855
  }
833
856
  return { embeddingNodeIds, embeddings };
834
857
  };
858
+ /**
859
+ * Fetch existing embedding hashes from CodeEmbedding table for incremental embedding.
860
+ * Returns a Map<nodeId, contentHash> suitable for passing to `runEmbeddingPipeline`.
861
+ * Handles legacy DBs without the `contentHash` column (all rows treated as stale with empty hash).
862
+ * Returns undefined if the CodeEmbedding table does not exist.
863
+ *
864
+ * @param execQuery - Cypher query executor (typically pool-adapter's `executeQuery`)
865
+ */
866
+ export const fetchExistingEmbeddingHashes = async (execQuery) => {
867
+ try {
868
+ const rows = await execQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.chunkIndex AS chunkIndex, e.startLine AS startLine, e.endLine AS endLine, e.contentHash AS contentHash`);
869
+ if (!rows || rows.length === 0)
870
+ return undefined;
871
+ const map = new Map();
872
+ for (const r of rows) {
873
+ const nodeId = r.nodeId ?? r[0];
874
+ const chunkIndex = r.chunkIndex ?? r[1];
875
+ const startLine = r.startLine ?? r[2];
876
+ const endLine = r.endLine ?? r[3];
877
+ const hash = r.contentHash ?? r[4] ?? STALE_HASH_SENTINEL;
878
+ if (nodeId) {
879
+ const hasChunkMetadata = chunkIndex !== undefined &&
880
+ chunkIndex !== null &&
881
+ startLine !== undefined &&
882
+ startLine !== null &&
883
+ endLine !== undefined &&
884
+ endLine !== null;
885
+ // Empty/null contentHash or missing chunk metadata means legacy row — treat as stale.
886
+ map.set(nodeId, hasChunkMetadata && hash ? hash : STALE_HASH_SENTINEL);
887
+ }
888
+ }
889
+ return map;
890
+ }
891
+ catch (err) {
892
+ const msg = err?.message ?? '';
893
+ if (isMissingColumnOrTableError(msg)) {
894
+ // Legacy rows missing chunk-aware columns — treat every row as stale.
895
+ try {
896
+ const rows = await execQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId`);
897
+ if (!rows || rows.length === 0)
898
+ return undefined;
899
+ const map = new Map();
900
+ for (const r of rows) {
901
+ const nodeId = r.nodeId ?? r[0];
902
+ if (nodeId)
903
+ map.set(nodeId, STALE_HASH_SENTINEL);
904
+ }
905
+ console.log(`[embed] ${map.size} nodes in legacy DB (missing chunk-aware columns) — all treated as stale`);
906
+ return map;
907
+ }
908
+ catch (fallbackErr) {
909
+ const fallbackMsg = fallbackErr?.message ?? '';
910
+ if (isMissingColumnOrTableError(fallbackMsg)) {
911
+ console.log(`[embed] CodeEmbedding table not yet present — full embedding run (${fallbackMsg})`);
912
+ return undefined;
913
+ }
914
+ throw fallbackErr;
915
+ }
916
+ }
917
+ throw err;
918
+ }
919
+ };
835
920
  export const closeLbug = async () => {
836
921
  if (conn) {
837
922
  try {
@@ -31,6 +31,7 @@ export declare const IMPL_SCHEMA: string;
31
31
  export declare const TYPE_ALIAS_SCHEMA: string;
32
32
  export declare const CONST_SCHEMA: string;
33
33
  export declare const STATIC_SCHEMA: string;
34
+ export declare const VARIABLE_SCHEMA: string;
34
35
  export declare const PROPERTY_SCHEMA: string;
35
36
  export declare const RECORD_SCHEMA: string;
36
37
  export declare const DELEGATE_SCHEMA: string;
@@ -41,8 +42,15 @@ export declare const MODULE_SCHEMA: string;
41
42
  export declare const ROUTE_SCHEMA = "\nCREATE NODE TABLE Route (\n id STRING,\n name STRING,\n filePath STRING,\n responseKeys STRING[],\n errorKeys STRING[],\n middleware STRING[],\n PRIMARY KEY (id)\n)";
42
43
  export declare const TOOL_SCHEMA = "\nCREATE NODE TABLE Tool (\n id STRING,\n name STRING,\n filePath STRING,\n description STRING,\n PRIMARY KEY (id)\n)";
43
44
  export declare const SECTION_SCHEMA = "\nCREATE NODE TABLE Section (\n id STRING,\n name STRING,\n filePath STRING,\n startLine INT64,\n endLine INT64,\n level INT64,\n content STRING,\n description STRING,\n PRIMARY KEY (id)\n)";
44
- export declare const RELATION_SCHEMA = "\nCREATE REL TABLE CodeRelation (\n FROM File TO File,\n FROM File TO Folder,\n FROM File TO Function,\n FROM File TO Class,\n FROM File TO Interface,\n FROM File TO Method,\n FROM File TO CodeElement,\n FROM File TO `Struct`,\n FROM File TO `Enum`,\n FROM File TO `Macro`,\n FROM File TO `Typedef`,\n FROM File TO `Union`,\n FROM File TO `Namespace`,\n FROM File TO `Trait`,\n FROM File TO `Impl`,\n FROM File TO `TypeAlias`,\n FROM File TO `Const`,\n FROM File TO `Static`,\n FROM File TO `Property`,\n FROM File TO `Record`,\n FROM File TO `Delegate`,\n FROM File TO `Annotation`,\n FROM File TO `Constructor`,\n FROM File TO `Template`,\n FROM File TO `Module`,\n FROM File TO Section,\n FROM Folder TO Folder,\n FROM Folder TO File,\n FROM Function TO Function,\n FROM Function TO Method,\n FROM Function TO Class,\n FROM Function TO Community,\n FROM Function TO `Macro`,\n FROM Function TO `Struct`,\n FROM Function TO `Template`,\n FROM Function TO `Enum`,\n FROM Function TO `Namespace`,\n FROM Function TO `TypeAlias`,\n FROM Function TO `Module`,\n FROM Function TO `Impl`,\n FROM Function TO Interface,\n FROM Function TO `Constructor`,\n FROM Function TO `Const`,\n FROM Function TO `Typedef`,\n FROM Function TO `Union`,\n FROM Function TO `Property`,\n FROM Function TO CodeElement,\n FROM Class TO Method,\n FROM Class TO Function,\n FROM Class TO Class,\n FROM Class TO Interface,\n FROM Class TO Community,\n FROM Class TO `Template`,\n FROM Class TO `TypeAlias`,\n FROM Class TO `Struct`,\n FROM Class TO `Enum`,\n FROM Class TO `Annotation`,\n FROM Class TO `Constructor`,\n FROM Class TO `Trait`,\n FROM Class TO `Macro`,\n FROM Class TO `Impl`,\n FROM Class TO `Union`,\n FROM Class TO `Namespace`,\n FROM Class TO `Typedef`,\n FROM Class TO `Property`,\n FROM Method TO Function,\n FROM Method TO Method,\n FROM Method TO Class,\n FROM Method TO Community,\n FROM Method TO `Template`,\n FROM Method TO `Struct`,\n FROM Method TO `TypeAlias`,\n FROM Method TO `Enum`,\n FROM Method TO `Macro`,\n FROM Method TO `Namespace`,\n FROM Method TO `Module`,\n FROM Method TO `Impl`,\n FROM Method TO Interface,\n FROM Method TO `Constructor`,\n FROM Method TO `Property`,\n FROM Method TO CodeElement,\n FROM `Template` TO `Template`,\n FROM `Template` TO Function,\n FROM `Template` TO Method,\n FROM `Template` TO Class,\n FROM `Template` TO `Struct`,\n FROM `Template` TO `TypeAlias`,\n FROM `Template` TO `Enum`,\n FROM `Template` TO `Macro`,\n FROM `Template` TO Interface,\n FROM `Template` TO `Constructor`,\n FROM `Module` TO `Module`,\n FROM Section TO Section,\n FROM Section TO File,\n FROM File TO Route,\n FROM Function TO Route,\n FROM Method TO Route,\n FROM File TO Tool,\n FROM Function TO Tool,\n FROM Method TO Tool,\n FROM CodeElement TO Community,\n FROM Interface TO Community,\n FROM Interface TO Function,\n FROM Interface TO Method,\n FROM Interface TO Class,\n FROM Interface TO Interface,\n FROM Interface TO `TypeAlias`,\n FROM Interface TO `Struct`,\n FROM Interface TO `Constructor`,\n FROM Interface TO `Property`,\n FROM `Struct` TO Community,\n FROM `Struct` TO `Trait`,\n FROM `Struct` TO `Struct`,\n FROM `Struct` TO Class,\n FROM `Struct` TO `Enum`,\n FROM `Struct` TO Function,\n FROM `Struct` TO Method,\n FROM `Struct` TO Interface,\n FROM `Struct` TO `Constructor`,\n FROM `Struct` TO `Property`,\n FROM `Enum` TO `Enum`,\n FROM `Enum` TO Community,\n FROM `Enum` TO Class,\n FROM `Enum` TO Interface,\n FROM `Macro` TO Community,\n FROM `Macro` TO Function,\n FROM `Macro` TO Method,\n FROM `Module` TO Function,\n FROM `Module` TO Method,\n FROM `Typedef` TO Community,\n FROM `Union` TO Community,\n FROM `Namespace` TO Community,\n FROM `Namespace` TO `Struct`,\n FROM `Trait` TO Method,\n FROM `Trait` TO `Constructor`,\n FROM `Trait` TO `Property`,\n FROM `Trait` TO Community,\n FROM `Impl` TO Method,\n FROM `Impl` TO `Constructor`,\n FROM `Impl` TO `Property`,\n FROM `Impl` TO Community,\n FROM `Impl` TO `Trait`,\n FROM `Impl` TO `Struct`,\n FROM `Impl` TO `Impl`,\n FROM `TypeAlias` TO Community,\n FROM `TypeAlias` TO `Trait`,\n FROM `TypeAlias` TO Class,\n FROM `Const` TO Community,\n FROM `Static` TO Community,\n FROM `Property` TO Community,\n FROM `Record` TO Method,\n FROM `Record` TO `Constructor`,\n FROM `Record` TO `Property`,\n FROM `Record` TO Community,\n FROM `Delegate` TO Community,\n FROM `Annotation` TO Community,\n FROM `Constructor` TO Community,\n FROM `Constructor` TO Interface,\n FROM `Constructor` TO Class,\n FROM `Constructor` TO Method,\n FROM `Constructor` TO Function,\n FROM `Constructor` TO `Constructor`,\n FROM `Constructor` TO `Struct`,\n FROM `Constructor` TO `Macro`,\n FROM `Constructor` TO `Template`,\n FROM `Constructor` TO `TypeAlias`,\n FROM `Constructor` TO `Enum`,\n FROM `Constructor` TO `Annotation`,\n FROM `Constructor` TO `Impl`,\n FROM `Constructor` TO `Namespace`,\n FROM `Constructor` TO `Module`,\n FROM `Constructor` TO `Property`,\n FROM `Constructor` TO `Typedef`,\n FROM `Template` TO Community,\n FROM `Module` TO Community,\n FROM Function TO Process,\n FROM Method TO Process,\n FROM Class TO Process,\n FROM Interface TO Process,\n FROM `Struct` TO Process,\n FROM `Constructor` TO Process,\n FROM `Module` TO Process,\n FROM `Macro` TO Process,\n FROM `Impl` TO Process,\n FROM `Typedef` TO Process,\n FROM `TypeAlias` TO Process,\n FROM `Enum` TO Process,\n FROM `Union` TO Process,\n FROM `Namespace` TO Process,\n FROM `Trait` TO Process,\n FROM `Const` TO Process,\n FROM `Static` TO Process,\n FROM `Property` TO Process,\n FROM `Record` TO Process,\n FROM `Delegate` TO Process,\n FROM `Annotation` TO Process,\n FROM `Template` TO Process,\n FROM CodeElement TO Process,\n FROM Route TO Process,\n FROM Tool TO Process,\n type STRING,\n confidence DOUBLE,\n reason STRING,\n step INT32\n)";
45
+ export declare const RELATION_SCHEMA = "\nCREATE REL TABLE CodeRelation (\n FROM File TO File,\n FROM File TO Folder,\n FROM File TO Function,\n FROM File TO Class,\n FROM File TO Interface,\n FROM File TO Method,\n FROM File TO CodeElement,\n FROM File TO `Struct`,\n FROM File TO `Enum`,\n FROM File TO `Macro`,\n FROM File TO `Typedef`,\n FROM File TO `Union`,\n FROM File TO `Namespace`,\n FROM File TO `Trait`,\n FROM File TO `Impl`,\n FROM File TO `TypeAlias`,\n FROM File TO `Const`,\n FROM File TO `Static`,\n FROM File TO `Variable`,\n FROM File TO `Property`,\n FROM File TO `Record`,\n FROM File TO `Delegate`,\n FROM File TO `Annotation`,\n FROM File TO `Constructor`,\n FROM File TO `Template`,\n FROM File TO `Module`,\n FROM File TO Section,\n FROM Folder TO Folder,\n FROM Folder TO File,\n FROM Function TO Function,\n FROM Function TO Method,\n FROM Function TO Class,\n FROM Function TO Community,\n FROM Function TO `Macro`,\n FROM Function TO `Struct`,\n FROM Function TO `Template`,\n FROM Function TO `Enum`,\n FROM Function TO `Namespace`,\n FROM Function TO `TypeAlias`,\n FROM Function TO `Module`,\n FROM Function TO `Impl`,\n FROM Function TO Interface,\n FROM Function TO `Constructor`,\n FROM Function TO `Const`,\n FROM Function TO `Typedef`,\n FROM Function TO `Union`,\n FROM Function TO `Property`,\n FROM Function TO CodeElement,\n FROM Class TO Method,\n FROM Class TO Function,\n FROM Class TO Class,\n FROM Class TO Interface,\n FROM Class TO Community,\n FROM Class TO `Template`,\n FROM Class TO `TypeAlias`,\n FROM Class TO `Struct`,\n FROM Class TO `Enum`,\n FROM Class TO `Annotation`,\n FROM Class TO `Constructor`,\n FROM Class TO `Trait`,\n FROM Class TO `Macro`,\n FROM Class TO `Impl`,\n FROM Class TO `Union`,\n FROM Class TO `Namespace`,\n FROM Class TO `Typedef`,\n FROM Class TO `Property`,\n FROM Method TO Function,\n FROM Method TO Method,\n FROM Method TO Class,\n FROM Method TO Community,\n FROM Method TO `Template`,\n FROM Method TO `Struct`,\n FROM Method TO `TypeAlias`,\n FROM Method TO `Enum`,\n FROM Method TO `Macro`,\n FROM Method TO `Namespace`,\n FROM Method TO `Module`,\n FROM Method TO `Impl`,\n FROM Method TO Interface,\n FROM Method TO `Constructor`,\n FROM Method TO `Property`,\n FROM Method TO CodeElement,\n FROM `Template` TO `Template`,\n FROM `Template` TO Function,\n FROM `Template` TO Method,\n FROM `Template` TO Class,\n FROM `Template` TO `Struct`,\n FROM `Template` TO `TypeAlias`,\n FROM `Template` TO `Enum`,\n FROM `Template` TO `Macro`,\n FROM `Template` TO Interface,\n FROM `Template` TO `Constructor`,\n FROM `Module` TO `Module`,\n FROM Section TO Section,\n FROM Section TO File,\n FROM File TO Route,\n FROM Function TO Route,\n FROM Method TO Route,\n FROM File TO Tool,\n FROM Function TO Tool,\n FROM Method TO Tool,\n FROM CodeElement TO Community,\n FROM Interface TO Community,\n FROM Interface TO Function,\n FROM Interface TO Method,\n FROM Interface TO Class,\n FROM Interface TO Interface,\n FROM Interface TO `TypeAlias`,\n FROM Interface TO `Struct`,\n FROM Interface TO `Constructor`,\n FROM Interface TO `Property`,\n FROM `Struct` TO Community,\n FROM `Struct` TO `Trait`,\n FROM `Struct` TO `Struct`,\n FROM `Struct` TO Class,\n FROM `Struct` TO `Enum`,\n FROM `Struct` TO Function,\n FROM `Struct` TO Method,\n FROM `Struct` TO Interface,\n FROM `Struct` TO `Constructor`,\n FROM `Struct` TO `Property`,\n FROM `Enum` TO `Enum`,\n FROM `Enum` TO Community,\n FROM `Enum` TO Class,\n FROM `Enum` TO Interface,\n FROM `Macro` TO Community,\n FROM `Macro` TO Function,\n FROM `Macro` TO Method,\n FROM `Module` TO Function,\n FROM `Module` TO Method,\n FROM `Typedef` TO Community,\n FROM `Union` TO Community,\n FROM `Namespace` TO Community,\n FROM `Namespace` TO `Struct`,\n FROM `Trait` TO Method,\n FROM `Trait` TO `Constructor`,\n FROM `Trait` TO `Property`,\n FROM `Trait` TO Community,\n FROM `Impl` TO Method,\n FROM `Impl` TO `Constructor`,\n FROM `Impl` TO `Property`,\n FROM `Impl` TO Community,\n FROM `Impl` TO `Trait`,\n FROM `Impl` TO `Struct`,\n FROM `Impl` TO `Impl`,\n FROM `TypeAlias` TO Community,\n FROM `TypeAlias` TO `Trait`,\n FROM `TypeAlias` TO Class,\n FROM `Const` TO Community,\n FROM `Static` TO Community,\n FROM `Variable` TO Community,\n FROM `Property` TO Community,\n FROM `Record` TO Method,\n FROM `Record` TO `Constructor`,\n FROM `Record` TO `Property`,\n FROM `Record` TO Community,\n FROM `Delegate` TO Community,\n FROM `Annotation` TO Community,\n FROM `Constructor` TO Community,\n FROM `Constructor` TO Interface,\n FROM `Constructor` TO Class,\n FROM `Constructor` TO Method,\n FROM `Constructor` TO Function,\n FROM `Constructor` TO `Constructor`,\n FROM `Constructor` TO `Struct`,\n FROM `Constructor` TO `Macro`,\n FROM `Constructor` TO `Template`,\n FROM `Constructor` TO `TypeAlias`,\n FROM `Constructor` TO `Enum`,\n FROM `Constructor` TO `Annotation`,\n FROM `Constructor` TO `Impl`,\n FROM `Constructor` TO `Namespace`,\n FROM `Constructor` TO `Module`,\n FROM `Constructor` TO `Property`,\n FROM `Constructor` TO `Typedef`,\n FROM `Template` TO Community,\n FROM `Module` TO Community,\n FROM Function TO Process,\n FROM Method TO Process,\n FROM Class TO Process,\n FROM Interface TO Process,\n FROM `Struct` TO Process,\n FROM `Constructor` TO Process,\n FROM `Module` TO Process,\n FROM `Macro` TO Process,\n FROM `Impl` TO Process,\n FROM `Typedef` TO Process,\n FROM `TypeAlias` TO Process,\n FROM `Enum` TO Process,\n FROM `Union` TO Process,\n FROM `Namespace` TO Process,\n FROM `Trait` TO Process,\n FROM `Const` TO Process,\n FROM `Static` TO Process,\n FROM `Variable` TO Process,\n FROM `Property` TO Process,\n FROM `Record` TO Process,\n FROM `Delegate` TO Process,\n FROM `Annotation` TO Process,\n FROM `Template` TO Process,\n FROM CodeElement TO Process,\n FROM Route TO Process,\n FROM Tool TO Process,\n type STRING,\n confidence DOUBLE,\n reason STRING,\n step INT32\n)";
45
46
  export declare const EMBEDDING_DIMS: number;
47
+ /** HNSW vector index name for the CodeEmbedding table. */
48
+ export declare const EMBEDDING_INDEX_NAME = "code_embedding_idx";
49
+ /**
50
+ * Sentinel value for "no content hash available" — used in legacy DBs and null rows.
51
+ * Nodes with this hash are always treated as stale and re-embedded.
52
+ */
53
+ export declare const STALE_HASH_SENTINEL = "";
46
54
  export declare const EMBEDDING_SCHEMA: string;
47
55
  /**
48
56
  * Create vector index for semantic search
@@ -149,6 +149,7 @@ export const IMPL_SCHEMA = CODE_ELEMENT_BASE('Impl');
149
149
  export const TYPE_ALIAS_SCHEMA = CODE_ELEMENT_BASE('TypeAlias');
150
150
  export const CONST_SCHEMA = CODE_ELEMENT_BASE('Const');
151
151
  export const STATIC_SCHEMA = CODE_ELEMENT_BASE('Static');
152
+ export const VARIABLE_SCHEMA = CODE_ELEMENT_BASE('Variable');
152
153
  export const PROPERTY_SCHEMA = CODE_ELEMENT_BASE('Property');
153
154
  export const RECORD_SCHEMA = CODE_ELEMENT_BASE('Record');
154
155
  export const DELEGATE_SCHEMA = CODE_ELEMENT_BASE('Delegate');
@@ -213,6 +214,7 @@ CREATE REL TABLE ${REL_TABLE_NAME} (
213
214
  FROM File TO \`TypeAlias\`,
214
215
  FROM File TO \`Const\`,
215
216
  FROM File TO \`Static\`,
217
+ FROM File TO \`Variable\`,
216
218
  FROM File TO \`Property\`,
217
219
  FROM File TO \`Record\`,
218
220
  FROM File TO \`Delegate\`,
@@ -344,6 +346,7 @@ CREATE REL TABLE ${REL_TABLE_NAME} (
344
346
  FROM \`TypeAlias\` TO Class,
345
347
  FROM \`Const\` TO Community,
346
348
  FROM \`Static\` TO Community,
349
+ FROM \`Variable\` TO Community,
347
350
  FROM \`Property\` TO Community,
348
351
  FROM \`Record\` TO Method,
349
352
  FROM \`Record\` TO \`Constructor\`,
@@ -387,6 +390,7 @@ CREATE REL TABLE ${REL_TABLE_NAME} (
387
390
  FROM \`Trait\` TO Process,
388
391
  FROM \`Const\` TO Process,
389
392
  FROM \`Static\` TO Process,
393
+ FROM \`Variable\` TO Process,
390
394
  FROM \`Property\` TO Process,
391
395
  FROM \`Record\` TO Process,
392
396
  FROM \`Delegate\` TO Process,
@@ -410,18 +414,30 @@ if (Number.isNaN(_rawDims) || _rawDims <= 0) {
410
414
  throw new Error(`GITNEXUS_EMBEDDING_DIMS must be a positive integer, got "${process.env.GITNEXUS_EMBEDDING_DIMS}"`);
411
415
  }
412
416
  export const EMBEDDING_DIMS = _rawDims;
417
+ /** HNSW vector index name for the CodeEmbedding table. */
418
+ export const EMBEDDING_INDEX_NAME = 'code_embedding_idx';
419
+ /**
420
+ * Sentinel value for "no content hash available" — used in legacy DBs and null rows.
421
+ * Nodes with this hash are always treated as stale and re-embedded.
422
+ */
423
+ export const STALE_HASH_SENTINEL = '';
413
424
  export const EMBEDDING_SCHEMA = `
414
425
  CREATE NODE TABLE ${EMBEDDING_TABLE_NAME} (
426
+ id STRING,
415
427
  nodeId STRING,
428
+ chunkIndex INT32,
429
+ startLine INT64,
430
+ endLine INT64,
416
431
  embedding FLOAT[${EMBEDDING_DIMS}],
417
- PRIMARY KEY (nodeId)
432
+ contentHash STRING,
433
+ PRIMARY KEY (id)
418
434
  )`;
419
435
  /**
420
436
  * Create vector index for semantic search
421
437
  * Uses HNSW (Hierarchical Navigable Small World) algorithm with cosine similarity
422
438
  */
423
439
  export const CREATE_VECTOR_INDEX_QUERY = `
424
- CALL CREATE_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', 'code_embedding_idx', 'embedding', metric := 'cosine')
440
+ CALL CREATE_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}', 'embedding', metric := 'cosine')
425
441
  `;
426
442
  // ============================================================================
427
443
  // ALL SCHEMA QUERIES IN ORDER
@@ -449,6 +465,7 @@ export const NODE_SCHEMA_QUERIES = [
449
465
  TYPE_ALIAS_SCHEMA,
450
466
  CONST_SCHEMA,
451
467
  STATIC_SCHEMA,
468
+ VARIABLE_SCHEMA,
452
469
  PROPERTY_SCHEMA,
453
470
  RECORD_SCHEMA,
454
471
  DELEGATE_SCHEMA,
@@ -15,6 +15,8 @@ import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReuse
15
15
  import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
16
16
  import { getCurrentCommit, hasGitDir } from '../storage/git.js';
17
17
  import { generateAIContextFiles } from '../cli/ai-context.js';
18
+ import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
19
+ import { STALE_HASH_SENTINEL } from './lbug/schema.js';
18
20
  /** Threshold: auto-skip embeddings for repos with more nodes than this */
19
21
  const EMBEDDING_NODE_LIMIT = 50_000;
20
22
  export const PHASE_LABELS = {
@@ -144,12 +146,12 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
144
146
  }
145
147
  else {
146
148
  progress('embeddings', 88, `Restoring ${cachedEmbeddings.length} cached embeddings...`);
149
+ const { batchInsertEmbeddings: batchInsert } = await import('./embeddings/embedding-pipeline.js');
147
150
  const EMBED_BATCH = 200;
148
151
  for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) {
149
152
  const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH);
150
- const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding }));
151
153
  try {
152
- await executeWithReusedStatement(`MERGE (e:CodeEmbedding {nodeId: $nodeId}) SET e.embedding = $embedding`, paramsList);
154
+ await batchInsert(executeWithReusedStatement, batch);
153
155
  }
154
156
  catch {
155
157
  /* some may fail if node was removed, that's fine */
@@ -170,6 +172,17 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
170
172
  const httpMode = isHttpMode();
171
173
  progress('embeddings', 90, httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...');
172
174
  const { runEmbeddingPipeline } = await import('./embeddings/embedding-pipeline.js');
175
+ // Build a Map<nodeId, contentHash> from cached embeddings for incremental mode
176
+ let existingEmbeddings;
177
+ if (cachedEmbeddingNodeIds.size > 0) {
178
+ existingEmbeddings = new Map();
179
+ for (const e of cachedEmbeddings) {
180
+ existingEmbeddings.set(e.nodeId, e.contentHash ?? STALE_HASH_SENTINEL);
181
+ }
182
+ }
183
+ const { readServerMapping } = await import('./embeddings/server-mapping.js');
184
+ const projectName = path.basename(repoPath);
185
+ const serverName = await readServerMapping(projectName);
173
186
  await runEmbeddingPipeline(executeQuery, executeWithReusedStatement, (p) => {
174
187
  const scaled = 90 + Math.round((p.percent / 100) * 8);
175
188
  const label = p.phase === 'loading-model'
@@ -178,14 +191,14 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
178
191
  : 'Loading embedding model...'
179
192
  : `Embedding ${p.nodesProcessed || 0}/${p.totalNodes || '?'}`;
180
193
  progress('embeddings', scaled, label);
181
- }, {}, cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined);
194
+ }, {}, cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, { repoName: projectName, serverName }, existingEmbeddings);
182
195
  }
183
196
  // ── Phase 5: Finalize (98–100%) ───────────────────────────────────
184
197
  progress('done', 98, 'Saving metadata...');
185
198
  // Count embeddings in the index (cached + newly generated)
186
199
  let embeddingCount = 0;
187
200
  try {
188
- const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`);
201
+ const embResult = await executeQuery(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN count(e) AS cnt`);
189
202
  embeddingCount = embResult?.[0]?.cnt ?? 0;
190
203
  }
191
204
  catch {
@@ -1,5 +1,8 @@
1
1
  import Parser from 'tree-sitter';
2
2
  import { SupportedLanguages } from '../../_shared/index.js';
3
3
  export declare const isLanguageAvailable: (language: SupportedLanguages) => boolean;
4
+ export declare const resolveLanguageKey: (language: SupportedLanguages, filePath?: string) => string;
5
+ export declare const getLanguageGrammar: (language: SupportedLanguages, filePath?: string) => any;
4
6
  export declare const loadParser: () => Promise<Parser>;
5
7
  export declare const loadLanguage: (language: SupportedLanguages, filePath?: string) => Promise<void>;
8
+ export declare const createParserForLanguage: (language: SupportedLanguages, filePath?: string) => Promise<Parser>;
@@ -50,6 +50,17 @@ const languageMap = {
50
50
  ...(Swift ? { [SupportedLanguages.Swift]: Swift } : {}),
51
51
  };
52
52
  export const isLanguageAvailable = (language) => language in languageMap;
53
+ export const resolveLanguageKey = (language, filePath) => language === SupportedLanguages.TypeScript && filePath?.endsWith('.tsx')
54
+ ? `${language}:tsx`
55
+ : language;
56
+ export const getLanguageGrammar = (language, filePath) => {
57
+ const key = resolveLanguageKey(language, filePath);
58
+ const lang = languageMap[key];
59
+ if (!lang) {
60
+ throw new Error(`Unsupported language: ${language}`);
61
+ }
62
+ return lang;
63
+ };
53
64
  export const loadParser = async () => {
54
65
  if (parser)
55
66
  return parser;
@@ -59,12 +70,10 @@ export const loadParser = async () => {
59
70
  export const loadLanguage = async (language, filePath) => {
60
71
  if (!parser)
61
72
  await loadParser();
62
- const key = language === SupportedLanguages.TypeScript && filePath?.endsWith('.tsx')
63
- ? `${language}:tsx`
64
- : language;
65
- const lang = languageMap[key];
66
- if (!lang) {
67
- throw new Error(`Unsupported language: ${language}`);
68
- }
69
- parser.setLanguage(lang);
73
+ parser.setLanguage(getLanguageGrammar(language, filePath));
74
+ };
75
+ export const createParserForLanguage = async (language, filePath) => {
76
+ const freshParser = new Parser();
77
+ freshParser.setLanguage(getLanguageGrammar(language, filePath));
78
+ return freshParser;
70
79
  };
@@ -30,6 +30,11 @@ export const initEmbedder = async () => {
30
30
  initPromise = (async () => {
31
31
  try {
32
32
  env.allowLocalModels = false;
33
+ // Default cache to user-writable location. transformers.js defaults to
34
+ // ./node_modules/.cache inside its own install dir, which is unwritable
35
+ // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
36
+ // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
37
+ env.cacheDir = process.env.HF_HOME ?? `${process.env.HOME}/.cache/huggingface`;
33
38
  console.error('GitNexus: Loading embedding model (first search may take a moment)...');
34
39
  // Try GPU first (DirectML on Windows, CUDA on Linux), fall back to CPU
35
40
  const isWindows = process.platform === 'win32';