@veewo/gitnexus 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/README.md +234 -0
  2. package/dist/benchmark/agent-context/evaluators.d.ts +9 -0
  3. package/dist/benchmark/agent-context/evaluators.js +196 -0
  4. package/dist/benchmark/agent-context/evaluators.test.d.ts +1 -0
  5. package/dist/benchmark/agent-context/evaluators.test.js +39 -0
  6. package/dist/benchmark/agent-context/io.d.ts +2 -0
  7. package/dist/benchmark/agent-context/io.js +23 -0
  8. package/dist/benchmark/agent-context/io.test.d.ts +1 -0
  9. package/dist/benchmark/agent-context/io.test.js +19 -0
  10. package/dist/benchmark/agent-context/report.d.ts +2 -0
  11. package/dist/benchmark/agent-context/report.js +59 -0
  12. package/dist/benchmark/agent-context/report.test.d.ts +1 -0
  13. package/dist/benchmark/agent-context/report.test.js +85 -0
  14. package/dist/benchmark/agent-context/runner.d.ts +46 -0
  15. package/dist/benchmark/agent-context/runner.js +111 -0
  16. package/dist/benchmark/agent-context/runner.test.d.ts +1 -0
  17. package/dist/benchmark/agent-context/runner.test.js +79 -0
  18. package/dist/benchmark/agent-context/tool-runner.d.ts +7 -0
  19. package/dist/benchmark/agent-context/tool-runner.js +18 -0
  20. package/dist/benchmark/agent-context/tool-runner.test.d.ts +1 -0
  21. package/dist/benchmark/agent-context/tool-runner.test.js +11 -0
  22. package/dist/benchmark/agent-context/types.d.ts +40 -0
  23. package/dist/benchmark/agent-context/types.js +1 -0
  24. package/dist/benchmark/analyze-runner.d.ts +16 -0
  25. package/dist/benchmark/analyze-runner.js +51 -0
  26. package/dist/benchmark/analyze-runner.test.d.ts +1 -0
  27. package/dist/benchmark/analyze-runner.test.js +37 -0
  28. package/dist/benchmark/evaluators.d.ts +6 -0
  29. package/dist/benchmark/evaluators.js +10 -0
  30. package/dist/benchmark/evaluators.test.d.ts +1 -0
  31. package/dist/benchmark/evaluators.test.js +12 -0
  32. package/dist/benchmark/io.d.ts +7 -0
  33. package/dist/benchmark/io.js +25 -0
  34. package/dist/benchmark/io.test.d.ts +1 -0
  35. package/dist/benchmark/io.test.js +35 -0
  36. package/dist/benchmark/neonspark-candidates.d.ts +19 -0
  37. package/dist/benchmark/neonspark-candidates.js +94 -0
  38. package/dist/benchmark/neonspark-candidates.test.d.ts +1 -0
  39. package/dist/benchmark/neonspark-candidates.test.js +43 -0
  40. package/dist/benchmark/neonspark-materialize.d.ts +19 -0
  41. package/dist/benchmark/neonspark-materialize.js +111 -0
  42. package/dist/benchmark/neonspark-materialize.test.d.ts +1 -0
  43. package/dist/benchmark/neonspark-materialize.test.js +124 -0
  44. package/dist/benchmark/neonspark-sync.d.ts +3 -0
  45. package/dist/benchmark/neonspark-sync.js +53 -0
  46. package/dist/benchmark/neonspark-sync.test.d.ts +1 -0
  47. package/dist/benchmark/neonspark-sync.test.js +20 -0
  48. package/dist/benchmark/report.d.ts +1 -0
  49. package/dist/benchmark/report.js +7 -0
  50. package/dist/benchmark/runner.d.ts +48 -0
  51. package/dist/benchmark/runner.js +302 -0
  52. package/dist/benchmark/runner.test.d.ts +1 -0
  53. package/dist/benchmark/runner.test.js +50 -0
  54. package/dist/benchmark/scoring.d.ts +16 -0
  55. package/dist/benchmark/scoring.js +27 -0
  56. package/dist/benchmark/scoring.test.d.ts +1 -0
  57. package/dist/benchmark/scoring.test.js +24 -0
  58. package/dist/benchmark/tool-runner.d.ts +6 -0
  59. package/dist/benchmark/tool-runner.js +17 -0
  60. package/dist/benchmark/types.d.ts +36 -0
  61. package/dist/benchmark/types.js +1 -0
  62. package/dist/cli/ai-context.d.ts +22 -0
  63. package/dist/cli/ai-context.js +184 -0
  64. package/dist/cli/ai-context.test.d.ts +1 -0
  65. package/dist/cli/ai-context.test.js +30 -0
  66. package/dist/cli/analyze-multi-scope-regression.test.d.ts +1 -0
  67. package/dist/cli/analyze-multi-scope-regression.test.js +22 -0
  68. package/dist/cli/analyze-options.d.ts +7 -0
  69. package/dist/cli/analyze-options.js +56 -0
  70. package/dist/cli/analyze-options.test.d.ts +1 -0
  71. package/dist/cli/analyze-options.test.js +36 -0
  72. package/dist/cli/analyze.d.ts +14 -0
  73. package/dist/cli/analyze.js +384 -0
  74. package/dist/cli/augment.d.ts +13 -0
  75. package/dist/cli/augment.js +33 -0
  76. package/dist/cli/benchmark-agent-context.d.ts +29 -0
  77. package/dist/cli/benchmark-agent-context.js +61 -0
  78. package/dist/cli/benchmark-agent-context.test.d.ts +1 -0
  79. package/dist/cli/benchmark-agent-context.test.js +80 -0
  80. package/dist/cli/benchmark-unity.d.ts +15 -0
  81. package/dist/cli/benchmark-unity.js +31 -0
  82. package/dist/cli/benchmark-unity.test.d.ts +1 -0
  83. package/dist/cli/benchmark-unity.test.js +18 -0
  84. package/dist/cli/claude-hooks.d.ts +22 -0
  85. package/dist/cli/claude-hooks.js +97 -0
  86. package/dist/cli/clean.d.ts +10 -0
  87. package/dist/cli/clean.js +60 -0
  88. package/dist/cli/eval-server.d.ts +30 -0
  89. package/dist/cli/eval-server.js +372 -0
  90. package/dist/cli/index.d.ts +2 -0
  91. package/dist/cli/index.js +182 -0
  92. package/dist/cli/list.d.ts +6 -0
  93. package/dist/cli/list.js +33 -0
  94. package/dist/cli/mcp.d.ts +8 -0
  95. package/dist/cli/mcp.js +34 -0
  96. package/dist/cli/repo-manager-alias.test.d.ts +1 -0
  97. package/dist/cli/repo-manager-alias.test.js +40 -0
  98. package/dist/cli/scope-filter.test.d.ts +1 -0
  99. package/dist/cli/scope-filter.test.js +49 -0
  100. package/dist/cli/serve.d.ts +4 -0
  101. package/dist/cli/serve.js +6 -0
  102. package/dist/cli/setup.d.ts +8 -0
  103. package/dist/cli/setup.js +311 -0
  104. package/dist/cli/setup.test.d.ts +1 -0
  105. package/dist/cli/setup.test.js +31 -0
  106. package/dist/cli/status.d.ts +6 -0
  107. package/dist/cli/status.js +27 -0
  108. package/dist/cli/tool.d.ts +40 -0
  109. package/dist/cli/tool.js +94 -0
  110. package/dist/cli/version.test.d.ts +1 -0
  111. package/dist/cli/version.test.js +19 -0
  112. package/dist/cli/wiki.d.ts +15 -0
  113. package/dist/cli/wiki.js +361 -0
  114. package/dist/config/ignore-service.d.ts +1 -0
  115. package/dist/config/ignore-service.js +210 -0
  116. package/dist/config/supported-languages.d.ts +12 -0
  117. package/dist/config/supported-languages.js +15 -0
  118. package/dist/core/augmentation/engine.d.ts +26 -0
  119. package/dist/core/augmentation/engine.js +213 -0
  120. package/dist/core/embeddings/embedder.d.ts +60 -0
  121. package/dist/core/embeddings/embedder.js +251 -0
  122. package/dist/core/embeddings/embedding-pipeline.d.ts +51 -0
  123. package/dist/core/embeddings/embedding-pipeline.js +329 -0
  124. package/dist/core/embeddings/index.d.ts +9 -0
  125. package/dist/core/embeddings/index.js +9 -0
  126. package/dist/core/embeddings/text-generator.d.ts +24 -0
  127. package/dist/core/embeddings/text-generator.js +182 -0
  128. package/dist/core/embeddings/types.d.ts +87 -0
  129. package/dist/core/embeddings/types.js +32 -0
  130. package/dist/core/graph/graph.d.ts +2 -0
  131. package/dist/core/graph/graph.js +66 -0
  132. package/dist/core/graph/types.d.ts +61 -0
  133. package/dist/core/graph/types.js +1 -0
  134. package/dist/core/ingestion/ast-cache.d.ts +11 -0
  135. package/dist/core/ingestion/ast-cache.js +34 -0
  136. package/dist/core/ingestion/call-processor.d.ts +15 -0
  137. package/dist/core/ingestion/call-processor.js +327 -0
  138. package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
  139. package/dist/core/ingestion/cluster-enricher.js +170 -0
  140. package/dist/core/ingestion/community-processor.d.ts +39 -0
  141. package/dist/core/ingestion/community-processor.js +312 -0
  142. package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
  143. package/dist/core/ingestion/entry-point-scoring.js +260 -0
  144. package/dist/core/ingestion/filesystem-walker.d.ts +28 -0
  145. package/dist/core/ingestion/filesystem-walker.js +80 -0
  146. package/dist/core/ingestion/framework-detection.d.ts +39 -0
  147. package/dist/core/ingestion/framework-detection.js +235 -0
  148. package/dist/core/ingestion/heritage-processor.d.ts +20 -0
  149. package/dist/core/ingestion/heritage-processor.js +197 -0
  150. package/dist/core/ingestion/import-processor.d.ts +38 -0
  151. package/dist/core/ingestion/import-processor.js +778 -0
  152. package/dist/core/ingestion/parsing-processor.d.ts +15 -0
  153. package/dist/core/ingestion/parsing-processor.js +291 -0
  154. package/dist/core/ingestion/pipeline.d.ts +5 -0
  155. package/dist/core/ingestion/pipeline.js +323 -0
  156. package/dist/core/ingestion/process-processor.d.ts +51 -0
  157. package/dist/core/ingestion/process-processor.js +309 -0
  158. package/dist/core/ingestion/scope-filter.d.ts +25 -0
  159. package/dist/core/ingestion/scope-filter.js +100 -0
  160. package/dist/core/ingestion/structure-processor.d.ts +2 -0
  161. package/dist/core/ingestion/structure-processor.js +36 -0
  162. package/dist/core/ingestion/symbol-table.d.ts +33 -0
  163. package/dist/core/ingestion/symbol-table.js +38 -0
  164. package/dist/core/ingestion/tree-sitter-queries.d.ts +12 -0
  165. package/dist/core/ingestion/tree-sitter-queries.js +398 -0
  166. package/dist/core/ingestion/utils.d.ts +10 -0
  167. package/dist/core/ingestion/utils.js +50 -0
  168. package/dist/core/ingestion/workers/parse-worker.d.ts +59 -0
  169. package/dist/core/ingestion/workers/parse-worker.js +672 -0
  170. package/dist/core/ingestion/workers/worker-pool.d.ts +16 -0
  171. package/dist/core/ingestion/workers/worker-pool.js +120 -0
  172. package/dist/core/kuzu/csv-generator.d.ts +29 -0
  173. package/dist/core/kuzu/csv-generator.js +336 -0
  174. package/dist/core/kuzu/kuzu-adapter.d.ts +101 -0
  175. package/dist/core/kuzu/kuzu-adapter.js +753 -0
  176. package/dist/core/kuzu/schema.d.ts +53 -0
  177. package/dist/core/kuzu/schema.js +407 -0
  178. package/dist/core/search/bm25-index.d.ts +23 -0
  179. package/dist/core/search/bm25-index.js +95 -0
  180. package/dist/core/search/hybrid-search.d.ts +49 -0
  181. package/dist/core/search/hybrid-search.js +118 -0
  182. package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
  183. package/dist/core/tree-sitter/parser-loader.js +44 -0
  184. package/dist/core/wiki/generator.d.ts +110 -0
  185. package/dist/core/wiki/generator.js +786 -0
  186. package/dist/core/wiki/graph-queries.d.ts +80 -0
  187. package/dist/core/wiki/graph-queries.js +238 -0
  188. package/dist/core/wiki/html-viewer.d.ts +10 -0
  189. package/dist/core/wiki/html-viewer.js +297 -0
  190. package/dist/core/wiki/llm-client.d.ts +40 -0
  191. package/dist/core/wiki/llm-client.js +162 -0
  192. package/dist/core/wiki/prompts.d.ts +53 -0
  193. package/dist/core/wiki/prompts.js +174 -0
  194. package/dist/lib/utils.d.ts +1 -0
  195. package/dist/lib/utils.js +3 -0
  196. package/dist/mcp/core/embedder.d.ts +27 -0
  197. package/dist/mcp/core/embedder.js +108 -0
  198. package/dist/mcp/core/kuzu-adapter.d.ts +34 -0
  199. package/dist/mcp/core/kuzu-adapter.js +231 -0
  200. package/dist/mcp/local/local-backend.d.ts +160 -0
  201. package/dist/mcp/local/local-backend.js +1646 -0
  202. package/dist/mcp/resources.d.ts +31 -0
  203. package/dist/mcp/resources.js +407 -0
  204. package/dist/mcp/server.d.ts +23 -0
  205. package/dist/mcp/server.js +251 -0
  206. package/dist/mcp/staleness.d.ts +15 -0
  207. package/dist/mcp/staleness.js +29 -0
  208. package/dist/mcp/tools.d.ts +24 -0
  209. package/dist/mcp/tools.js +195 -0
  210. package/dist/server/api.d.ts +10 -0
  211. package/dist/server/api.js +344 -0
  212. package/dist/server/mcp-http.d.ts +13 -0
  213. package/dist/server/mcp-http.js +100 -0
  214. package/dist/storage/git.d.ts +6 -0
  215. package/dist/storage/git.js +32 -0
  216. package/dist/storage/repo-manager.d.ts +125 -0
  217. package/dist/storage/repo-manager.js +257 -0
  218. package/dist/types/pipeline.d.ts +34 -0
  219. package/dist/types/pipeline.js +18 -0
  220. package/hooks/claude/gitnexus-hook.cjs +135 -0
  221. package/hooks/claude/pre-tool-use.sh +78 -0
  222. package/hooks/claude/session-start.sh +42 -0
  223. package/package.json +92 -0
  224. package/skills/gitnexus-cli.md +82 -0
  225. package/skills/gitnexus-debugging.md +89 -0
  226. package/skills/gitnexus-exploring.md +78 -0
  227. package/skills/gitnexus-guide.md +64 -0
  228. package/skills/gitnexus-impact-analysis.md +97 -0
  229. package/skills/gitnexus-refactoring.md +121 -0
  230. package/vendor/leiden/index.cjs +355 -0
  231. package/vendor/leiden/utils.cjs +392 -0
@@ -0,0 +1,329 @@
1
+ /**
2
+ * Embedding Pipeline Module
3
+ *
4
+ * Orchestrates the background embedding process:
5
+ * 1. Query embeddable nodes from KuzuDB
6
+ * 2. Generate text representations
7
+ * 3. Batch embed using transformers.js
8
+ * 4. Update KuzuDB with embeddings
9
+ * 5. Create vector index for semantic search
10
+ */
11
+ import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js';
12
+ import { generateBatchEmbeddingTexts } from './text-generator.js';
13
+ import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
14
+ const isDev = process.env.NODE_ENV === 'development';
15
+ /**
16
+ * Query all embeddable nodes from KuzuDB
17
+ * Uses table-specific queries (File has different schema than code elements)
18
+ */
19
+ const queryEmbeddableNodes = async (executeQuery) => {
20
+ const allNodes = [];
21
+ // Query each embeddable table with table-specific columns
22
+ for (const label of EMBEDDABLE_LABELS) {
23
+ try {
24
+ let query;
25
+ if (label === 'File') {
26
+ // File nodes don't have startLine/endLine
27
+ query = `
28
+ MATCH (n:File)
29
+ RETURN n.id AS id, n.name AS name, 'File' AS label,
30
+ n.filePath AS filePath, n.content AS content
31
+ `;
32
+ }
33
+ else {
34
+ // Code elements have startLine/endLine
35
+ query = `
36
+ MATCH (n:${label})
37
+ RETURN n.id AS id, n.name AS name, '${label}' AS label,
38
+ n.filePath AS filePath, n.content AS content,
39
+ n.startLine AS startLine, n.endLine AS endLine
40
+ `;
41
+ }
42
+ const rows = await executeQuery(query);
43
+ for (const row of rows) {
44
+ allNodes.push({
45
+ id: row.id ?? row[0],
46
+ name: row.name ?? row[1],
47
+ label: row.label ?? row[2],
48
+ filePath: row.filePath ?? row[3],
49
+ content: row.content ?? row[4] ?? '',
50
+ startLine: row.startLine ?? row[5],
51
+ endLine: row.endLine ?? row[6],
52
+ });
53
+ }
54
+ }
55
+ catch (error) {
56
+ // Table might not exist or be empty, continue
57
+ if (isDev) {
58
+ console.warn(`Query for ${label} nodes failed:`, error);
59
+ }
60
+ }
61
+ }
62
+ return allNodes;
63
+ };
64
+ /**
65
+ * Batch INSERT embeddings into separate CodeEmbedding table
66
+ * Using a separate lightweight table avoids copy-on-write overhead
67
+ * that occurs when UPDATEing nodes with large content fields
68
+ */
69
+ const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
70
+ // INSERT into separate embedding table - much more memory efficient!
71
+ const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`;
72
+ const paramsList = updates.map(u => ({ nodeId: u.id, embedding: u.embedding }));
73
+ await executeWithReusedStatement(cypher, paramsList);
74
+ };
75
+ /**
76
+ * Create the vector index for semantic search
77
+ * Now indexes the separate CodeEmbedding table
78
+ */
79
+ const createVectorIndex = async (executeQuery) => {
80
+ const cypher = `
81
+ CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
82
+ `;
83
+ try {
84
+ await executeQuery(cypher);
85
+ }
86
+ catch (error) {
87
+ // Index might already exist
88
+ if (isDev) {
89
+ console.warn('Vector index creation warning:', error);
90
+ }
91
+ }
92
+ };
93
+ /**
94
+ * Run the embedding pipeline
95
+ *
96
+ * @param executeQuery - Function to execute Cypher queries against KuzuDB
97
+ * @param executeWithReusedStatement - Function to execute with reused prepared statement
98
+ * @param onProgress - Callback for progress updates
99
+ * @param config - Optional configuration override
100
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
101
+ */
102
+ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
103
+ const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
104
+ try {
105
+ // Phase 1: Load embedding model
106
+ onProgress({
107
+ phase: 'loading-model',
108
+ percent: 0,
109
+ modelDownloadPercent: 0,
110
+ });
111
+ await initEmbedder((modelProgress) => {
112
+ const downloadPercent = modelProgress.progress ?? 0;
113
+ onProgress({
114
+ phase: 'loading-model',
115
+ percent: Math.round(downloadPercent * 0.2),
116
+ modelDownloadPercent: downloadPercent,
117
+ });
118
+ }, finalConfig);
119
+ onProgress({
120
+ phase: 'loading-model',
121
+ percent: 20,
122
+ modelDownloadPercent: 100,
123
+ });
124
+ if (isDev) {
125
+ console.log('🔍 Querying embeddable nodes...');
126
+ }
127
+ // Phase 2: Query embeddable nodes
128
+ let nodes = await queryEmbeddableNodes(executeQuery);
129
+ // Incremental mode: filter out nodes that already have embeddings
130
+ if (skipNodeIds && skipNodeIds.size > 0) {
131
+ const beforeCount = nodes.length;
132
+ nodes = nodes.filter(n => !skipNodeIds.has(n.id));
133
+ if (isDev) {
134
+ console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
135
+ }
136
+ }
137
+ const totalNodes = nodes.length;
138
+ if (isDev) {
139
+ console.log(`📊 Found ${totalNodes} embeddable nodes`);
140
+ }
141
+ if (totalNodes === 0) {
142
+ onProgress({
143
+ phase: 'ready',
144
+ percent: 100,
145
+ nodesProcessed: 0,
146
+ totalNodes: 0,
147
+ });
148
+ return;
149
+ }
150
+ // Phase 3: Batch embed nodes
151
+ const batchSize = finalConfig.batchSize;
152
+ const totalBatches = Math.ceil(totalNodes / batchSize);
153
+ let processedNodes = 0;
154
+ onProgress({
155
+ phase: 'embedding',
156
+ percent: 20,
157
+ nodesProcessed: 0,
158
+ totalNodes,
159
+ currentBatch: 0,
160
+ totalBatches,
161
+ });
162
+ for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) {
163
+ const start = batchIndex * batchSize;
164
+ const end = Math.min(start + batchSize, totalNodes);
165
+ const batch = nodes.slice(start, end);
166
+ // Generate texts for this batch
167
+ const texts = generateBatchEmbeddingTexts(batch, finalConfig);
168
+ // Embed the batch
169
+ const embeddings = await embedBatch(texts);
170
+ // Update KuzuDB with embeddings
171
+ const updates = batch.map((node, i) => ({
172
+ id: node.id,
173
+ embedding: embeddingToArray(embeddings[i]),
174
+ }));
175
+ await batchInsertEmbeddings(executeWithReusedStatement, updates);
176
+ processedNodes += batch.length;
177
+ // Report progress (20-90% for embedding phase)
178
+ const embeddingProgress = 20 + ((processedNodes / totalNodes) * 70);
179
+ onProgress({
180
+ phase: 'embedding',
181
+ percent: Math.round(embeddingProgress),
182
+ nodesProcessed: processedNodes,
183
+ totalNodes,
184
+ currentBatch: batchIndex + 1,
185
+ totalBatches,
186
+ });
187
+ }
188
+ // Phase 4: Create vector index
189
+ onProgress({
190
+ phase: 'indexing',
191
+ percent: 90,
192
+ nodesProcessed: totalNodes,
193
+ totalNodes,
194
+ });
195
+ if (isDev) {
196
+ console.log('📇 Creating vector index...');
197
+ }
198
+ await createVectorIndex(executeQuery);
199
+ // Complete
200
+ onProgress({
201
+ phase: 'ready',
202
+ percent: 100,
203
+ nodesProcessed: totalNodes,
204
+ totalNodes,
205
+ });
206
+ if (isDev) {
207
+ console.log('✅ Embedding pipeline complete!');
208
+ }
209
+ }
210
+ catch (error) {
211
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
212
+ if (isDev) {
213
+ console.error('❌ Embedding pipeline error:', error);
214
+ }
215
+ onProgress({
216
+ phase: 'error',
217
+ percent: 0,
218
+ error: errorMessage,
219
+ });
220
+ throw error;
221
+ }
222
+ };
223
+ /**
224
+ * Perform semantic search using the vector index
225
+ *
226
+ * Uses CodeEmbedding table and queries each node table to get metadata
227
+ *
228
+ * @param executeQuery - Function to execute Cypher queries
229
+ * @param query - Search query text
230
+ * @param k - Number of results to return (default: 10)
231
+ * @param maxDistance - Maximum distance threshold (default: 0.5)
232
+ * @returns Array of search results ordered by relevance
233
+ */
234
+ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance = 0.5) => {
235
+ if (!isEmbedderReady()) {
236
+ throw new Error('Embedding model not initialized. Run embedding pipeline first.');
237
+ }
238
+ // Embed the query
239
+ const queryEmbedding = await embedText(query);
240
+ const queryVec = embeddingToArray(queryEmbedding);
241
+ const queryVecStr = `[${queryVec.join(',')}]`;
242
+ // Query the vector index on CodeEmbedding to get nodeIds and distances
243
+ const vectorQuery = `
244
+ CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
245
+ CAST(${queryVecStr} AS FLOAT[384]), ${k})
246
+ YIELD node AS emb, distance
247
+ WITH emb, distance
248
+ WHERE distance < ${maxDistance}
249
+ RETURN emb.nodeId AS nodeId, distance
250
+ ORDER BY distance
251
+ `;
252
+ const embResults = await executeQuery(vectorQuery);
253
+ if (embResults.length === 0) {
254
+ return [];
255
+ }
256
+ // Get metadata for each result by querying each node table
257
+ const results = [];
258
+ for (const embRow of embResults) {
259
+ const nodeId = embRow.nodeId ?? embRow[0];
260
+ const distance = embRow.distance ?? embRow[1];
261
+ // Extract label from node ID (format: Label:path:name)
262
+ const labelEndIdx = nodeId.indexOf(':');
263
+ const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown';
264
+ // Query the specific table for this node
265
+ // File nodes don't have startLine/endLine
266
+ try {
267
+ let nodeQuery;
268
+ if (label === 'File') {
269
+ nodeQuery = `
270
+ MATCH (n:File {id: '${nodeId.replace(/'/g, "''")}'})
271
+ RETURN n.name AS name, n.filePath AS filePath
272
+ `;
273
+ }
274
+ else {
275
+ nodeQuery = `
276
+ MATCH (n:${label} {id: '${nodeId.replace(/'/g, "''")}'})
277
+ RETURN n.name AS name, n.filePath AS filePath,
278
+ n.startLine AS startLine, n.endLine AS endLine
279
+ `;
280
+ }
281
+ const nodeRows = await executeQuery(nodeQuery);
282
+ if (nodeRows.length > 0) {
283
+ const nodeRow = nodeRows[0];
284
+ results.push({
285
+ nodeId,
286
+ name: nodeRow.name ?? nodeRow[0] ?? '',
287
+ label,
288
+ filePath: nodeRow.filePath ?? nodeRow[1] ?? '',
289
+ distance,
290
+ startLine: label !== 'File' ? (nodeRow.startLine ?? nodeRow[2]) : undefined,
291
+ endLine: label !== 'File' ? (nodeRow.endLine ?? nodeRow[3]) : undefined,
292
+ });
293
+ }
294
+ }
295
+ catch {
296
+ // Table might not exist, skip
297
+ }
298
+ }
299
+ return results;
300
+ };
301
+ /**
302
+ * Semantic search with graph expansion (flattened results)
303
+ *
304
+ * Note: With multi-table schema, graph traversal is simplified.
305
+ * Returns semantic matches with their metadata.
306
+ * For full graph traversal, use execute_vector_cypher tool directly.
307
+ *
308
+ * @param executeQuery - Function to execute Cypher queries
309
+ * @param query - Search query text
310
+ * @param k - Number of initial semantic matches (default: 5)
311
+ * @param _hops - Unused (kept for API compatibility).
312
+ * @returns Semantic matches with metadata
313
+ */
314
+ export const semanticSearchWithContext = async (executeQuery, query, k = 5, _hops = 1) => {
315
+ // For multi-table schema, just return semantic search results
316
+ // Graph traversal is complex with separate tables - use execute_vector_cypher instead
317
+ const results = await semanticSearch(executeQuery, query, k, 0.5);
318
+ return results.map(r => ({
319
+ matchId: r.nodeId,
320
+ matchName: r.name,
321
+ matchLabel: r.label,
322
+ matchPath: r.filePath,
323
+ distance: r.distance,
324
+ connectedId: null,
325
+ connectedName: null,
326
+ connectedLabel: null,
327
+ relationType: null,
328
+ }));
329
+ };
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Embeddings Module
3
+ *
4
+ * Re-exports for the embedding pipeline system.
5
+ */
6
+ export * from './types.js';
7
+ export * from './embedder.js';
8
+ export * from './text-generator.js';
9
+ export * from './embedding-pipeline.js';
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Embeddings Module
3
+ *
4
+ * Re-exports for the embedding pipeline system.
5
+ */
6
+ export * from './types.js';
7
+ export * from './embedder.js';
8
+ export * from './text-generator.js';
9
+ export * from './embedding-pipeline.js';
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Text Generator Module
3
+ *
4
+ * Pure functions to generate embedding text from code nodes.
5
+ * Combines node metadata with code snippets for semantic matching.
6
+ */
7
+ import type { EmbeddableNode, EmbeddingConfig } from './types.js';
8
+ /**
9
+ * Generate embedding text for any embeddable node
10
+ * Dispatches to the appropriate generator based on node label
11
+ *
12
+ * @param node - The node to generate text for
13
+ * @param config - Optional configuration for max snippet length
14
+ * @returns Text suitable for embedding
15
+ */
16
+ export declare const generateEmbeddingText: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
17
+ /**
18
+ * Generate embedding texts for a batch of nodes
19
+ *
20
+ * @param nodes - Array of nodes to generate text for
21
+ * @param config - Optional configuration
22
+ * @returns Array of texts in the same order as input nodes
23
+ */
24
+ export declare const generateBatchEmbeddingTexts: (nodes: EmbeddableNode[], config?: Partial<EmbeddingConfig>) => string[];
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Text Generator Module
3
+ *
4
+ * Pure functions to generate embedding text from code nodes.
5
+ * Combines node metadata with code snippets for semantic matching.
6
+ */
7
+ import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
8
+ /**
9
+ * Extract the filename from a file path
10
+ */
11
+ const getFileName = (filePath) => {
12
+ const parts = filePath.split('/');
13
+ return parts[parts.length - 1] || filePath;
14
+ };
15
+ /**
16
+ * Extract the directory path from a file path
17
+ */
18
+ const getDirectory = (filePath) => {
19
+ const parts = filePath.split('/');
20
+ parts.pop();
21
+ return parts.join('/') || '';
22
+ };
23
+ /**
24
+ * Truncate content to max length, preserving word boundaries
25
+ */
26
+ const truncateContent = (content, maxLength) => {
27
+ if (content.length <= maxLength) {
28
+ return content;
29
+ }
30
+ // Find last space before maxLength to avoid cutting words
31
+ const truncated = content.slice(0, maxLength);
32
+ const lastSpace = truncated.lastIndexOf(' ');
33
+ if (lastSpace > maxLength * 0.8) {
34
+ return truncated.slice(0, lastSpace) + '...';
35
+ }
36
+ return truncated + '...';
37
+ };
38
+ /**
39
+ * Clean code content for embedding
40
+ * Removes excessive whitespace while preserving structure
41
+ */
42
+ const cleanContent = (content) => {
43
+ return content
44
+ // Normalize line endings
45
+ .replace(/\r\n/g, '\n')
46
+ // Remove excessive blank lines (more than 2)
47
+ .replace(/\n{3,}/g, '\n\n')
48
+ // Trim each line
49
+ .split('\n')
50
+ .map(line => line.trimEnd())
51
+ .join('\n')
52
+ .trim();
53
+ };
54
+ /**
55
+ * Generate embedding text for a Function node
56
+ */
57
+ const generateFunctionText = (node, maxSnippetLength) => {
58
+ const parts = [
59
+ `Function: ${node.name}`,
60
+ `File: ${getFileName(node.filePath)}`,
61
+ ];
62
+ const dir = getDirectory(node.filePath);
63
+ if (dir) {
64
+ parts.push(`Directory: ${dir}`);
65
+ }
66
+ if (node.content) {
67
+ const cleanedContent = cleanContent(node.content);
68
+ const snippet = truncateContent(cleanedContent, maxSnippetLength);
69
+ parts.push('', snippet);
70
+ }
71
+ return parts.join('\n');
72
+ };
73
+ /**
74
+ * Generate embedding text for a Class node
75
+ */
76
+ const generateClassText = (node, maxSnippetLength) => {
77
+ const parts = [
78
+ `Class: ${node.name}`,
79
+ `File: ${getFileName(node.filePath)}`,
80
+ ];
81
+ const dir = getDirectory(node.filePath);
82
+ if (dir) {
83
+ parts.push(`Directory: ${dir}`);
84
+ }
85
+ if (node.content) {
86
+ const cleanedContent = cleanContent(node.content);
87
+ const snippet = truncateContent(cleanedContent, maxSnippetLength);
88
+ parts.push('', snippet);
89
+ }
90
+ return parts.join('\n');
91
+ };
92
+ /**
93
+ * Generate embedding text for a Method node
94
+ */
95
+ const generateMethodText = (node, maxSnippetLength) => {
96
+ const parts = [
97
+ `Method: ${node.name}`,
98
+ `File: ${getFileName(node.filePath)}`,
99
+ ];
100
+ const dir = getDirectory(node.filePath);
101
+ if (dir) {
102
+ parts.push(`Directory: ${dir}`);
103
+ }
104
+ if (node.content) {
105
+ const cleanedContent = cleanContent(node.content);
106
+ const snippet = truncateContent(cleanedContent, maxSnippetLength);
107
+ parts.push('', snippet);
108
+ }
109
+ return parts.join('\n');
110
+ };
111
+ /**
112
+ * Generate embedding text for an Interface node
113
+ */
114
+ const generateInterfaceText = (node, maxSnippetLength) => {
115
+ const parts = [
116
+ `Interface: ${node.name}`,
117
+ `File: ${getFileName(node.filePath)}`,
118
+ ];
119
+ const dir = getDirectory(node.filePath);
120
+ if (dir) {
121
+ parts.push(`Directory: ${dir}`);
122
+ }
123
+ if (node.content) {
124
+ const cleanedContent = cleanContent(node.content);
125
+ const snippet = truncateContent(cleanedContent, maxSnippetLength);
126
+ parts.push('', snippet);
127
+ }
128
+ return parts.join('\n');
129
+ };
130
+ /**
131
+ * Generate embedding text for a File node
132
+ * Uses file name and first N characters of content
133
+ */
134
+ const generateFileText = (node, maxSnippetLength) => {
135
+ const parts = [
136
+ `File: ${node.name}`,
137
+ `Path: ${node.filePath}`,
138
+ ];
139
+ if (node.content) {
140
+ const cleanedContent = cleanContent(node.content);
141
+ // For files, use a shorter snippet since they can be very long
142
+ const snippet = truncateContent(cleanedContent, Math.min(maxSnippetLength, 300));
143
+ parts.push('', snippet);
144
+ }
145
+ return parts.join('\n');
146
+ };
147
+ /**
148
+ * Generate embedding text for any embeddable node
149
+ * Dispatches to the appropriate generator based on node label
150
+ *
151
+ * @param node - The node to generate text for
152
+ * @param config - Optional configuration for max snippet length
153
+ * @returns Text suitable for embedding
154
+ */
155
+ export const generateEmbeddingText = (node, config = {}) => {
156
+ const maxSnippetLength = config.maxSnippetLength ?? DEFAULT_EMBEDDING_CONFIG.maxSnippetLength;
157
+ switch (node.label) {
158
+ case 'Function':
159
+ return generateFunctionText(node, maxSnippetLength);
160
+ case 'Class':
161
+ return generateClassText(node, maxSnippetLength);
162
+ case 'Method':
163
+ return generateMethodText(node, maxSnippetLength);
164
+ case 'Interface':
165
+ return generateInterfaceText(node, maxSnippetLength);
166
+ case 'File':
167
+ return generateFileText(node, maxSnippetLength);
168
+ default:
169
+ // Fallback for any other embeddable type
170
+ return `${node.label}: ${node.name}\nPath: ${node.filePath}`;
171
+ }
172
+ };
173
+ /**
174
+ * Generate embedding texts for a batch of nodes
175
+ *
176
+ * @param nodes - Array of nodes to generate text for
177
+ * @param config - Optional configuration
178
+ * @returns Array of texts in the same order as input nodes
179
+ */
180
+ export const generateBatchEmbeddingTexts = (nodes, config = {}) => {
181
+ return nodes.map(node => generateEmbeddingText(node, config));
182
+ };
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Embedding Pipeline Types
3
+ *
4
+ * Type definitions for the embedding generation and semantic search system.
5
+ */
6
+ /**
7
+ * Node labels that should be embedded for semantic search
8
+ * These are code elements that benefit from semantic matching
9
+ */
10
+ export declare const EMBEDDABLE_LABELS: readonly ["Function", "Class", "Method", "Interface", "File"];
11
+ export type EmbeddableLabel = typeof EMBEDDABLE_LABELS[number];
12
+ /**
13
+ * Check if a label should be embedded
14
+ */
15
+ export declare const isEmbeddableLabel: (label: string) => label is EmbeddableLabel;
16
+ /**
17
+ * Embedding pipeline phases
18
+ */
19
+ export type EmbeddingPhase = 'idle' | 'loading-model' | 'embedding' | 'indexing' | 'ready' | 'error';
20
+ /**
21
+ * Progress information for the embedding pipeline
22
+ */
23
+ export interface EmbeddingProgress {
24
+ phase: EmbeddingPhase;
25
+ percent: number;
26
+ modelDownloadPercent?: number;
27
+ nodesProcessed?: number;
28
+ totalNodes?: number;
29
+ currentBatch?: number;
30
+ totalBatches?: number;
31
+ error?: string;
32
+ }
33
+ /**
34
+ * Configuration for the embedding pipeline
35
+ */
36
+ export interface EmbeddingConfig {
37
+ /** Model identifier for transformers.js */
38
+ modelId: string;
39
+ /** Number of nodes to embed in each batch */
40
+ batchSize: number;
41
+ /** Embedding vector dimensions */
42
+ dimensions: number;
43
+ /** Device to use for inference: 'auto' tries GPU first (DirectML on Windows, CUDA on Linux), falls back to CPU */
44
+ device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
45
+ /** Maximum characters of code snippet to include */
46
+ maxSnippetLength: number;
47
+ }
48
+ /**
49
+ * Default embedding configuration
50
+ * Uses snowflake-arctic-embed-xs for browser efficiency
51
+ * Tries WebGPU first (fast), user can choose WASM fallback if unavailable
52
+ */
53
+ export declare const DEFAULT_EMBEDDING_CONFIG: EmbeddingConfig;
54
+ /**
55
+ * Result from semantic search
56
+ */
57
+ export interface SemanticSearchResult {
58
+ nodeId: string;
59
+ name: string;
60
+ label: string;
61
+ filePath: string;
62
+ distance: number;
63
+ startLine?: number;
64
+ endLine?: number;
65
+ }
66
+ /**
67
+ * Node data for embedding (minimal structure from KuzuDB query)
68
+ */
69
+ export interface EmbeddableNode {
70
+ id: string;
71
+ name: string;
72
+ label: string;
73
+ filePath: string;
74
+ content: string;
75
+ startLine?: number;
76
+ endLine?: number;
77
+ }
78
+ /**
79
+ * Model download progress from transformers.js
80
+ */
81
+ export interface ModelProgress {
82
+ status: 'initiate' | 'download' | 'progress' | 'done' | 'ready';
83
+ file?: string;
84
+ progress?: number;
85
+ loaded?: number;
86
+ total?: number;
87
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Embedding Pipeline Types
3
+ *
4
+ * Type definitions for the embedding generation and semantic search system.
5
+ */
6
+ /**
7
+ * Node labels that should be embedded for semantic search
8
+ * These are code elements that benefit from semantic matching
9
+ */
10
+ export const EMBEDDABLE_LABELS = [
11
+ 'Function',
12
+ 'Class',
13
+ 'Method',
14
+ 'Interface',
15
+ 'File',
16
+ ];
17
+ /**
18
+ * Check if a label should be embedded
19
+ */
20
+ export const isEmbeddableLabel = (label) => EMBEDDABLE_LABELS.includes(label);
21
+ /**
22
+ * Default embedding configuration
23
+ * Uses snowflake-arctic-embed-xs for browser efficiency
24
+ * Tries WebGPU first (fast), user can choose WASM fallback if unavailable
25
+ */
26
+ export const DEFAULT_EMBEDDING_CONFIG = {
27
+ modelId: 'Snowflake/snowflake-arctic-embed-xs',
28
+ batchSize: 16,
29
+ dimensions: 384,
30
+ device: 'auto',
31
+ maxSnippetLength: 500,
32
+ };
@@ -0,0 +1,2 @@
1
+ import { KnowledgeGraph } from './types.js';
2
+ export declare const createKnowledgeGraph: () => KnowledgeGraph;