gitnexus 1.6.2-rc.2 → 1.6.2-rc.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/lbug/schema-constants.d.ts +1 -1
- package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
- package/dist/_shared/lbug/schema-constants.js +1 -0
- package/dist/_shared/lbug/schema-constants.js.map +1 -1
- package/dist/cli/analyze.js +3 -0
- package/dist/core/embeddings/ast-utils.d.ts +22 -0
- package/dist/core/embeddings/ast-utils.js +105 -0
- package/dist/core/embeddings/character-chunk.d.ts +12 -0
- package/dist/core/embeddings/character-chunk.js +43 -0
- package/dist/core/embeddings/chunker.d.ts +14 -0
- package/dist/core/embeddings/chunker.js +234 -0
- package/dist/core/embeddings/embedder.js +5 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +29 -24
- package/dist/core/embeddings/embedding-pipeline.js +244 -125
- package/dist/core/embeddings/line-index.d.ts +7 -0
- package/dist/core/embeddings/line-index.js +42 -0
- package/dist/core/embeddings/server-mapping.d.ts +15 -0
- package/dist/core/embeddings/server-mapping.js +33 -0
- package/dist/core/embeddings/structural-extractor.d.ts +15 -0
- package/dist/core/embeddings/structural-extractor.js +58 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -13
- package/dist/core/embeddings/text-generator.js +151 -119
- package/dist/core/embeddings/types.d.ts +81 -3
- package/dist/core/embeddings/types.js +105 -3
- package/dist/core/group/extractors/http-patterns/node.js +130 -0
- package/dist/core/group/extractors/manifest-extractor.js +20 -5
- package/dist/core/group/sync.js +49 -1
- package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
- package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
- package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
- package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/call-extractors/generic.js +59 -0
- package/dist/core/ingestion/call-processor.d.ts +1 -3
- package/dist/core/ingestion/call-processor.js +49 -47
- package/dist/core/ingestion/call-types.d.ts +60 -0
- package/dist/core/ingestion/call-types.js +2 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
- package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
- package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
- package/dist/core/ingestion/field-types.d.ts +1 -1
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
- package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
- package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
- package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
- package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
- package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
- package/dist/core/ingestion/import-resolvers/configs/python.js +27 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
- package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
- package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
- package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/go.js +4 -19
- package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
- package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
- package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/php.js +4 -7
- package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
- package/dist/core/ingestion/import-resolvers/python.js +3 -18
- package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
- package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
- package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
- package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/rust.js +4 -47
- package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
- package/dist/core/ingestion/import-resolvers/standard.js +7 -8
- package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
- package/dist/core/ingestion/language-provider.d.ts +12 -0
- package/dist/core/ingestion/languages/c-cpp.js +15 -12
- package/dist/core/ingestion/languages/csharp.js +11 -21
- package/dist/core/ingestion/languages/dart.js +11 -7
- package/dist/core/ingestion/languages/go.js +11 -20
- package/dist/core/ingestion/languages/java.js +11 -18
- package/dist/core/ingestion/languages/kotlin.js +11 -13
- package/dist/core/ingestion/languages/php.js +11 -7
- package/dist/core/ingestion/languages/python.js +11 -7
- package/dist/core/ingestion/languages/ruby.js +11 -7
- package/dist/core/ingestion/languages/rust.js +11 -7
- package/dist/core/ingestion/languages/swift.js +11 -18
- package/dist/core/ingestion/languages/typescript.js +15 -23
- package/dist/core/ingestion/languages/vue.js +11 -17
- package/dist/core/ingestion/model/index.d.ts +2 -2
- package/dist/core/ingestion/model/index.js +1 -1
- package/dist/core/ingestion/model/resolve.d.ts +3 -0
- package/dist/core/ingestion/model/resolve.js +6 -2
- package/dist/core/ingestion/parsing-processor.d.ts +1 -2
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
- package/dist/core/ingestion/tree-sitter-queries.js +81 -0
- package/dist/core/ingestion/type-env.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.js +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
- package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
- package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
- package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
- package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
- package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/variable-extractors/generic.js +80 -0
- package/dist/core/ingestion/variable-types.d.ts +82 -0
- package/dist/core/ingestion/variable-types.js +2 -0
- package/dist/core/ingestion/workers/parse-worker.js +196 -166
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/lbug/csv-generator.js +1 -0
- package/dist/core/lbug/lbug-adapter.d.ts +13 -4
- package/dist/core/lbug/lbug-adapter.js +166 -81
- package/dist/core/lbug/schema.d.ts +9 -1
- package/dist/core/lbug/schema.js +19 -2
- package/dist/core/run-analyze.js +17 -4
- package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
- package/dist/core/tree-sitter/parser-loader.js +17 -8
- package/dist/mcp/core/embedder.js +5 -0
- package/dist/mcp/local/local-backend.js +29 -19
- package/dist/server/api.js +10 -21
- package/package.json +5 -3
- package/scripts/build-tree-sitter-proto.cjs +82 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
- package/vendor/tree-sitter-proto/package.json +1 -7
- package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
- package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
- package/dist/core/ingestion/call-sites/java.d.ts +0 -9
- package/dist/core/ingestion/call-sites/java.js +0 -30
- package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
- package/dist/core/ingestion/import-resolvers/vue.js +0 -9
|
@@ -3,44 +3,76 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Orchestrates the background embedding process:
|
|
5
5
|
* 1. Query embeddable nodes from LadybugDB
|
|
6
|
-
* 2. Generate text representations
|
|
7
|
-
* 3.
|
|
8
|
-
* 4. Update LadybugDB with embeddings
|
|
6
|
+
* 2. Generate text representations with enriched metadata
|
|
7
|
+
* 3. Chunk long nodes, batch embed
|
|
8
|
+
* 4. Update LadybugDB with chunk-aware embeddings
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
|
+
import { createHash } from 'crypto';
|
|
11
12
|
import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady, } from './embedder.js';
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
13
|
+
import { generateEmbeddingText } from './text-generator.js';
|
|
14
|
+
import { chunkNode, characterChunk } from './chunker.js';
|
|
15
|
+
import { extractStructuralNames } from './structural-extractor.js';
|
|
16
|
+
import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
|
|
17
|
+
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
|
|
18
|
+
import { loadVectorExtension } from '../lbug/lbug-adapter.js';
|
|
14
19
|
const isDev = process.env.NODE_ENV === 'development';
|
|
20
|
+
/**
|
|
21
|
+
* Compute a stable content fingerprint for an embeddable node.
|
|
22
|
+
* Used to detect when the underlying text has changed so stale vectors
|
|
23
|
+
* can be replaced (DELETE-then-INSERT, the Kuzu-sanctioned pattern for
|
|
24
|
+
* vector-indexed rows).
|
|
25
|
+
*/
|
|
26
|
+
export const contentHashForNode = (node, config = {}) => {
|
|
27
|
+
// Hash must be deterministic across runs, so exclude methodNames/fieldNames
|
|
28
|
+
// which are populated during the batch loop via AST extraction.
|
|
29
|
+
// Using only node.content ensures the hash stays stable.
|
|
30
|
+
const text = generateEmbeddingText({ ...node, methodNames: undefined, fieldNames: undefined }, node.content, config);
|
|
31
|
+
return createHash('sha1').update(text).digest('hex');
|
|
32
|
+
};
|
|
15
33
|
/**
|
|
16
34
|
* Query all embeddable nodes from LadybugDB
|
|
17
|
-
* Uses table-specific queries
|
|
35
|
+
* Uses table-specific queries for different label types
|
|
18
36
|
*/
|
|
19
37
|
const queryEmbeddableNodes = async (executeQuery) => {
|
|
20
38
|
const allNodes = [];
|
|
21
|
-
// Query each embeddable table with table-specific columns
|
|
22
39
|
for (const label of EMBEDDABLE_LABELS) {
|
|
23
40
|
try {
|
|
24
41
|
let query;
|
|
25
|
-
if (label === '
|
|
26
|
-
//
|
|
42
|
+
if (label === 'Method') {
|
|
43
|
+
// Method has parameterCount and returnType
|
|
27
44
|
query = `
|
|
28
|
-
MATCH (n:
|
|
29
|
-
RETURN n.id AS id, n.name AS name, '
|
|
30
|
-
n.filePath AS filePath, n.content AS content
|
|
45
|
+
MATCH (n:Method)
|
|
46
|
+
RETURN n.id AS id, n.name AS name, 'Method' AS label,
|
|
47
|
+
n.filePath AS filePath, n.content AS content,
|
|
48
|
+
n.startLine AS startLine, n.endLine AS endLine,
|
|
49
|
+
n.isExported AS isExported, n.description AS description,
|
|
50
|
+
n.parameterCount AS parameterCount, n.returnType AS returnType
|
|
51
|
+
`;
|
|
52
|
+
}
|
|
53
|
+
else if (LABELS_WITH_EXPORTED.has(label)) {
|
|
54
|
+
// Function, Class, Interface have isExported and description
|
|
55
|
+
query = `
|
|
56
|
+
MATCH (n:\`${label}\`)
|
|
57
|
+
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
58
|
+
n.filePath AS filePath, n.content AS content,
|
|
59
|
+
n.startLine AS startLine, n.endLine AS endLine,
|
|
60
|
+
n.isExported AS isExported, n.description AS description
|
|
31
61
|
`;
|
|
32
62
|
}
|
|
33
63
|
else {
|
|
34
|
-
//
|
|
64
|
+
// Multi-language tables (Struct, Enum, etc.) — have description but no isExported
|
|
35
65
|
query = `
|
|
36
|
-
MATCH (n
|
|
37
|
-
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
66
|
+
MATCH (n:\`${label}\`)
|
|
67
|
+
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
38
68
|
n.filePath AS filePath, n.content AS content,
|
|
39
|
-
n.startLine AS startLine, n.endLine AS endLine
|
|
69
|
+
n.startLine AS startLine, n.endLine AS endLine,
|
|
70
|
+
n.description AS description
|
|
40
71
|
`;
|
|
41
72
|
}
|
|
42
73
|
const rows = await executeQuery(query);
|
|
43
74
|
for (const row of rows) {
|
|
75
|
+
const hasExportedColumn = label === 'Method' || LABELS_WITH_EXPORTED.has(label);
|
|
44
76
|
allNodes.push({
|
|
45
77
|
id: row.id ?? row[0],
|
|
46
78
|
name: row.name ?? row[1],
|
|
@@ -49,11 +81,18 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
49
81
|
content: row.content ?? row[4] ?? '',
|
|
50
82
|
startLine: row.startLine ?? row[5],
|
|
51
83
|
endLine: row.endLine ?? row[6],
|
|
84
|
+
isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
|
|
85
|
+
description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
|
|
86
|
+
...(label === 'Method'
|
|
87
|
+
? {
|
|
88
|
+
parameterCount: row.parameterCount ?? row[9],
|
|
89
|
+
returnType: row.returnType ?? row[10],
|
|
90
|
+
}
|
|
91
|
+
: {}),
|
|
52
92
|
});
|
|
53
93
|
}
|
|
54
94
|
}
|
|
55
95
|
catch (error) {
|
|
56
|
-
// Table might not exist or be empty, continue
|
|
57
96
|
if (isDev) {
|
|
58
97
|
console.warn(`Query for ${label} nodes failed:`, error);
|
|
59
98
|
}
|
|
@@ -62,42 +101,36 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
62
101
|
return allNodes;
|
|
63
102
|
};
|
|
64
103
|
/**
|
|
65
|
-
* Batch INSERT embeddings into
|
|
66
|
-
* Using a separate lightweight table avoids copy-on-write overhead
|
|
67
|
-
* that occurs when UPDATEing nodes with large content fields
|
|
104
|
+
* Batch INSERT chunk-aware embeddings into CodeEmbedding table
|
|
68
105
|
*/
|
|
69
|
-
const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
|
|
70
|
-
|
|
71
|
-
const
|
|
72
|
-
|
|
106
|
+
export const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
|
|
107
|
+
const cypher = `CREATE (e:${EMBEDDING_TABLE_NAME} {id: $id, nodeId: $nodeId, chunkIndex: $chunkIndex, startLine: $startLine, endLine: $endLine, embedding: $embedding, contentHash: $contentHash})`;
|
|
108
|
+
const paramsList = updates.map((u) => ({
|
|
109
|
+
id: `${u.nodeId}:${u.chunkIndex}`,
|
|
110
|
+
nodeId: u.nodeId,
|
|
111
|
+
chunkIndex: u.chunkIndex,
|
|
112
|
+
startLine: u.startLine,
|
|
113
|
+
endLine: u.endLine,
|
|
114
|
+
embedding: u.embedding,
|
|
115
|
+
contentHash: u.contentHash ?? STALE_HASH_SENTINEL,
|
|
116
|
+
}));
|
|
73
117
|
await executeWithReusedStatement(cypher, paramsList);
|
|
74
118
|
};
|
|
75
119
|
/**
|
|
76
120
|
* Create the vector index for semantic search
|
|
77
|
-
|
|
121
|
+
|
|
122
|
+
* Now indexes the separate CodeEmbedding table.
|
|
123
|
+
* Delegates extension loading to lbug-adapter's loadVectorExtension(),
|
|
124
|
+
* which owns the VECTOR extension lifecycle and state tracking.
|
|
125
|
+
|
|
78
126
|
*/
|
|
79
|
-
let vectorExtensionLoaded = false;
|
|
80
127
|
const createVectorIndex = async (executeQuery) => {
|
|
81
|
-
//
|
|
82
|
-
|
|
83
|
-
try {
|
|
84
|
-
await executeQuery('INSTALL VECTOR');
|
|
85
|
-
await executeQuery('LOAD EXTENSION VECTOR');
|
|
86
|
-
vectorExtensionLoaded = true;
|
|
87
|
-
}
|
|
88
|
-
catch {
|
|
89
|
-
// Extension may already be loaded — CREATE_VECTOR_INDEX will fail clearly if not
|
|
90
|
-
vectorExtensionLoaded = true;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
const cypher = `
|
|
94
|
-
CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
|
|
95
|
-
`;
|
|
128
|
+
// Delegate to the adapter which tracks loaded state and handles DB reconnect resets
|
|
129
|
+
await loadVectorExtension();
|
|
96
130
|
try {
|
|
97
|
-
await executeQuery(
|
|
131
|
+
await executeQuery(CREATE_VECTOR_INDEX_QUERY);
|
|
98
132
|
}
|
|
99
133
|
catch (error) {
|
|
100
|
-
// Index might already exist
|
|
101
134
|
if (isDev) {
|
|
102
135
|
console.warn('Vector index creation warning:', error);
|
|
103
136
|
}
|
|
@@ -111,8 +144,13 @@ const createVectorIndex = async (executeQuery) => {
|
|
|
111
144
|
* @param onProgress - Callback for progress updates
|
|
112
145
|
* @param config - Optional configuration override
|
|
113
146
|
* @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
|
|
147
|
+
* @param context - Optional repo/server context for metadata enrichment
|
|
148
|
+
* @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
|
|
149
|
+
* Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
|
|
150
|
+
* and re-embedded; nodes not in the map are embedded fresh.
|
|
151
|
+
|
|
114
152
|
*/
|
|
115
|
-
export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
|
|
153
|
+
export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds, context, existingEmbeddings) => {
|
|
116
154
|
const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
|
|
117
155
|
try {
|
|
118
156
|
// Phase 1: Load embedding model
|
|
@@ -141,12 +179,57 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
141
179
|
}
|
|
142
180
|
// Phase 2: Query embeddable nodes
|
|
143
181
|
let nodes = await queryEmbeddableNodes(executeQuery);
|
|
144
|
-
//
|
|
145
|
-
if (
|
|
182
|
+
// Apply context metadata
|
|
183
|
+
if (context?.repoName) {
|
|
184
|
+
for (const node of nodes) {
|
|
185
|
+
node.repoName = context.repoName;
|
|
186
|
+
node.serverName = context.serverName;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Incremental mode: compare content hashes, delete stale rows, skip fresh ones.
|
|
190
|
+
// Computed hashes for stale nodes are cached so batchInsertEmbeddings can reuse them
|
|
191
|
+
// (avoids double computation).
|
|
192
|
+
const computedStaleHashes = new Map();
|
|
193
|
+
if (existingEmbeddings && existingEmbeddings.size > 0) {
|
|
146
194
|
const beforeCount = nodes.length;
|
|
147
|
-
|
|
195
|
+
const staleNodeIds = [];
|
|
196
|
+
nodes = nodes.filter((n) => {
|
|
197
|
+
const existingHash = existingEmbeddings.get(n.id);
|
|
198
|
+
if (existingHash === undefined) {
|
|
199
|
+
// New node — needs embedding
|
|
200
|
+
return true;
|
|
201
|
+
}
|
|
202
|
+
const currentHash = contentHashForNode(n, finalConfig);
|
|
203
|
+
if (currentHash !== existingHash) {
|
|
204
|
+
// Content changed — cache hash for reuse during insert, mark for DELETE + re-embed
|
|
205
|
+
computedStaleHashes.set(n.id, currentHash);
|
|
206
|
+
staleNodeIds.push(n.id);
|
|
207
|
+
return true;
|
|
208
|
+
}
|
|
209
|
+
// Hash matches — skip (fresh); no need to cache hash for skipped nodes
|
|
210
|
+
return false;
|
|
211
|
+
});
|
|
212
|
+
// DELETE stale embedding rows so they can be re-inserted
|
|
213
|
+
// (Kuzu forbids SET on vector-indexed properties; DELETE-then-INSERT is the sanctioned pattern)
|
|
214
|
+
if (staleNodeIds.length > 0) {
|
|
215
|
+
if (isDev) {
|
|
216
|
+
console.log(`🔄 Deleting ${staleNodeIds.length} stale embedding rows for re-embed`);
|
|
217
|
+
}
|
|
218
|
+
try {
|
|
219
|
+
await executeWithReusedStatement(`MATCH (e:${EMBEDDING_TABLE_NAME} {nodeId: $nodeId}) DELETE e`, staleNodeIds.map((nodeId) => ({ nodeId })));
|
|
220
|
+
}
|
|
221
|
+
catch (err) {
|
|
222
|
+
// "does not exist" = rows already gone — safe to proceed.
|
|
223
|
+
// All other errors risk vector-index corruption (Kuzu requires DELETE-before-INSERT
|
|
224
|
+
// for vector-indexed properties) — propagate so the pipeline aborts cleanly.
|
|
225
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
226
|
+
if (!msg.includes('does not exist')) {
|
|
227
|
+
throw new Error(`[embed] Failed to delete stale embedding rows — aborting to prevent vector-index corruption: ${msg}`);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
148
231
|
if (isDev) {
|
|
149
|
-
console.log(`📦 Incremental embeddings: ${beforeCount} total, ${
|
|
232
|
+
console.log(`📦 Incremental embeddings: ${beforeCount} total, ${existingEmbeddings.size} cached, ${staleNodeIds.length} stale, ${nodes.length} to embed`);
|
|
150
233
|
}
|
|
151
234
|
}
|
|
152
235
|
const totalNodes = nodes.length;
|
|
@@ -154,6 +237,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
154
237
|
console.log(`📊 Found ${totalNodes} embeddable nodes`);
|
|
155
238
|
}
|
|
156
239
|
if (totalNodes === 0) {
|
|
240
|
+
// Ensure the vector index exists even when no new nodes need embedding.
|
|
241
|
+
// A prior crash or first-time incremental run may have left CodeEmbedding
|
|
242
|
+
// rows without ever reaching index creation.
|
|
243
|
+
await createVectorIndex(executeQuery);
|
|
157
244
|
onProgress({
|
|
158
245
|
phase: 'ready',
|
|
159
246
|
percent: 100,
|
|
@@ -162,42 +249,99 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
162
249
|
});
|
|
163
250
|
return;
|
|
164
251
|
}
|
|
165
|
-
// Phase 3:
|
|
252
|
+
// Phase 3: Chunk + embed nodes
|
|
166
253
|
const batchSize = finalConfig.batchSize;
|
|
167
|
-
const
|
|
254
|
+
const chunkSize = finalConfig.chunkSize;
|
|
255
|
+
const overlap = finalConfig.overlap;
|
|
168
256
|
let processedNodes = 0;
|
|
257
|
+
let totalChunks = 0;
|
|
169
258
|
onProgress({
|
|
170
259
|
phase: 'embedding',
|
|
171
260
|
percent: 20,
|
|
172
261
|
nodesProcessed: 0,
|
|
173
262
|
totalNodes,
|
|
174
263
|
currentBatch: 0,
|
|
175
|
-
totalBatches,
|
|
264
|
+
totalBatches: Math.ceil(totalNodes / batchSize),
|
|
176
265
|
});
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
const
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
const
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
266
|
+
// Process in batches of nodes
|
|
267
|
+
for (let batchIndex = 0; batchIndex < totalNodes; batchIndex += batchSize) {
|
|
268
|
+
const batch = nodes.slice(batchIndex, batchIndex + batchSize);
|
|
269
|
+
// Chunk each node and generate text
|
|
270
|
+
const allTexts = [];
|
|
271
|
+
const allUpdates = [];
|
|
272
|
+
for (const node of batch) {
|
|
273
|
+
const isShort = isShortLabel(node.label);
|
|
274
|
+
const startLine = node.startLine ?? 0;
|
|
275
|
+
const endLine = node.endLine ?? 0;
|
|
276
|
+
// Extract structural names for class-like nodes via AST extractors
|
|
277
|
+
if (!isShort && STRUCTURAL_LABELS.has(node.label)) {
|
|
278
|
+
try {
|
|
279
|
+
const names = await extractStructuralNames(node.content, node.filePath);
|
|
280
|
+
node.methodNames = names.methodNames;
|
|
281
|
+
node.fieldNames = names.fieldNames;
|
|
282
|
+
}
|
|
283
|
+
catch {
|
|
284
|
+
// AST extraction failed — names stay undefined, text-generator handles gracefully
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
// Compute content hash once per node (re-use cached value for stale nodes)
|
|
288
|
+
const hash = computedStaleHashes.get(node.id) ?? contentHashForNode(node, finalConfig);
|
|
289
|
+
let chunks;
|
|
290
|
+
if (isShort) {
|
|
291
|
+
chunks = [{ text: node.content, chunkIndex: 0, startLine, endLine }];
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
try {
|
|
295
|
+
chunks = await chunkNode(node.label, node.content, node.filePath, startLine, endLine, chunkSize, overlap);
|
|
296
|
+
}
|
|
297
|
+
catch (chunkErr) {
|
|
298
|
+
if (isDev) {
|
|
299
|
+
console.warn(`⚠️ AST chunking failed for ${node.label} "${node.name}" (${node.filePath}), falling back to character-based chunking:`, chunkErr);
|
|
300
|
+
}
|
|
301
|
+
chunks = characterChunk(node.content, startLine, endLine, chunkSize, overlap);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
for (const chunk of chunks) {
|
|
305
|
+
const text = generateEmbeddingText(node, chunk.text, finalConfig);
|
|
306
|
+
allTexts.push(text);
|
|
307
|
+
allUpdates.push({
|
|
308
|
+
nodeId: node.id,
|
|
309
|
+
chunkIndex: chunk.chunkIndex,
|
|
310
|
+
startLine: chunk.startLine,
|
|
311
|
+
endLine: chunk.endLine,
|
|
312
|
+
contentHash: hash,
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Embed chunk texts in sub-batches to control memory
|
|
317
|
+
const EMBED_SUB_BATCH = 8;
|
|
318
|
+
for (let si = 0; si < allTexts.length; si += EMBED_SUB_BATCH) {
|
|
319
|
+
const subTexts = allTexts.slice(si, si + EMBED_SUB_BATCH);
|
|
320
|
+
const subUpdates = allUpdates.slice(si, si + EMBED_SUB_BATCH);
|
|
321
|
+
let embeddings;
|
|
322
|
+
try {
|
|
323
|
+
embeddings = await embedBatch(subTexts);
|
|
324
|
+
}
|
|
325
|
+
catch (embedErr) {
|
|
326
|
+
console.error(`❌ embedBatch failed for ${subTexts.length} texts (first: "${subTexts[0]?.substring(0, 80)}..."):`, embedErr);
|
|
327
|
+
throw embedErr;
|
|
328
|
+
}
|
|
329
|
+
const dbUpdates = subUpdates.map((u, i) => ({
|
|
330
|
+
...u,
|
|
331
|
+
embedding: embeddingToArray(embeddings[i]),
|
|
332
|
+
}));
|
|
333
|
+
await batchInsertEmbeddings(executeWithReusedStatement, dbUpdates);
|
|
334
|
+
}
|
|
191
335
|
processedNodes += batch.length;
|
|
192
|
-
|
|
336
|
+
totalChunks += allUpdates.length;
|
|
193
337
|
const embeddingProgress = 20 + (processedNodes / totalNodes) * 70;
|
|
194
338
|
onProgress({
|
|
195
339
|
phase: 'embedding',
|
|
196
340
|
percent: Math.round(embeddingProgress),
|
|
197
341
|
nodesProcessed: processedNodes,
|
|
198
342
|
totalNodes,
|
|
199
|
-
currentBatch: batchIndex + 1,
|
|
200
|
-
totalBatches,
|
|
343
|
+
currentBatch: Math.floor(batchIndex / batchSize) + 1,
|
|
344
|
+
totalBatches: Math.ceil(totalNodes / batchSize),
|
|
201
345
|
});
|
|
202
346
|
}
|
|
203
347
|
// Phase 4: Create vector index
|
|
@@ -211,7 +355,6 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
211
355
|
console.log('📇 Creating vector index...');
|
|
212
356
|
}
|
|
213
357
|
await createVectorIndex(executeQuery);
|
|
214
|
-
// Complete
|
|
215
358
|
onProgress({
|
|
216
359
|
phase: 'ready',
|
|
217
360
|
percent: 100,
|
|
@@ -219,7 +362,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
219
362
|
totalNodes,
|
|
220
363
|
});
|
|
221
364
|
if (isDev) {
|
|
222
|
-
console.log(
|
|
365
|
+
console.log(`✅ Embedding pipeline complete! (${totalChunks} chunks from ${totalNodes} nodes)`);
|
|
223
366
|
}
|
|
224
367
|
}
|
|
225
368
|
catch (error) {
|
|
@@ -236,68 +379,57 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
236
379
|
}
|
|
237
380
|
};
|
|
238
381
|
/**
|
|
239
|
-
* Perform semantic search using the vector index
|
|
240
|
-
*
|
|
241
|
-
* Uses CodeEmbedding table and queries each node table to get metadata
|
|
242
|
-
*
|
|
243
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
244
|
-
* @param query - Search query text
|
|
245
|
-
* @param k - Number of results to return (default: 10)
|
|
246
|
-
* @param maxDistance - Maximum distance threshold (default: 0.5)
|
|
247
|
-
* @returns Array of search results ordered by relevance
|
|
382
|
+
* Perform semantic search using the vector index with chunk deduplication
|
|
248
383
|
*/
|
|
249
384
|
export const semanticSearch = async (executeQuery, query, k = 10, maxDistance = 0.5) => {
|
|
250
385
|
if (!isEmbedderReady()) {
|
|
251
386
|
throw new Error('Embedding model not initialized. Run embedding pipeline first.');
|
|
252
387
|
}
|
|
253
|
-
// Embed the query
|
|
254
388
|
const queryEmbedding = await embedText(query);
|
|
255
389
|
const queryVec = embeddingToArray(queryEmbedding);
|
|
256
390
|
const queryVecStr = `[${queryVec.join(',')}]`;
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
391
|
+
const bestChunks = await collectBestChunks(k, async (fetchLimit) => {
|
|
392
|
+
const vectorQuery = `
|
|
393
|
+
CALL QUERY_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}',
|
|
394
|
+
CAST(${queryVecStr} AS FLOAT[${queryVec.length}]), ${fetchLimit})
|
|
395
|
+
YIELD node AS emb, distance
|
|
396
|
+
WITH emb, distance
|
|
397
|
+
WHERE distance < ${maxDistance}
|
|
398
|
+
RETURN emb.nodeId AS nodeId, emb.chunkIndex AS chunkIndex,
|
|
399
|
+
emb.startLine AS startLine, emb.endLine AS endLine, distance
|
|
400
|
+
ORDER BY distance
|
|
401
|
+
`;
|
|
402
|
+
const embResults = await executeQuery(vectorQuery);
|
|
403
|
+
return embResults.map((row) => ({
|
|
404
|
+
nodeId: row.nodeId ?? row[0],
|
|
405
|
+
chunkIndex: row.chunkIndex ?? row[1] ?? 0,
|
|
406
|
+
startLine: row.startLine ?? row[2] ?? 0,
|
|
407
|
+
endLine: row.endLine ?? row[3] ?? 0,
|
|
408
|
+
distance: row.distance ?? row[4],
|
|
409
|
+
}));
|
|
410
|
+
});
|
|
411
|
+
if (bestChunks.size === 0) {
|
|
269
412
|
return [];
|
|
270
413
|
}
|
|
271
414
|
// Group results by label for batched metadata queries
|
|
272
415
|
const byLabel = new Map();
|
|
273
|
-
for (const
|
|
274
|
-
const nodeId = embRow.nodeId ?? embRow[0];
|
|
275
|
-
const distance = embRow.distance ?? embRow[1];
|
|
416
|
+
for (const [nodeId, chunk] of Array.from(bestChunks.entries()).slice(0, k)) {
|
|
276
417
|
const labelEndIdx = nodeId.indexOf(':');
|
|
277
418
|
const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown';
|
|
278
419
|
if (!byLabel.has(label))
|
|
279
420
|
byLabel.set(label, []);
|
|
280
|
-
byLabel.get(label).push({ nodeId,
|
|
421
|
+
byLabel.get(label).push({ nodeId, ...chunk });
|
|
281
422
|
}
|
|
282
423
|
// Batch-fetch metadata per label
|
|
283
424
|
const results = [];
|
|
284
425
|
for (const [label, items] of byLabel) {
|
|
285
426
|
const idList = items.map((i) => `'${i.nodeId.replace(/'/g, "''")}'`).join(', ');
|
|
286
427
|
try {
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
`;
|
|
293
|
-
}
|
|
294
|
-
else {
|
|
295
|
-
nodeQuery = `
|
|
296
|
-
MATCH (n:${label}) WHERE n.id IN [${idList}]
|
|
297
|
-
RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
|
|
298
|
-
n.startLine AS startLine, n.endLine AS endLine
|
|
299
|
-
`;
|
|
300
|
-
}
|
|
428
|
+
const nodeQuery = `
|
|
429
|
+
MATCH (n:\`${label}\`) WHERE n.id IN [${idList}]
|
|
430
|
+
RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
|
|
431
|
+
n.startLine AS startLine, n.endLine AS endLine
|
|
432
|
+
`;
|
|
301
433
|
const nodeRows = await executeQuery(nodeQuery);
|
|
302
434
|
const rowMap = new Map();
|
|
303
435
|
for (const row of nodeRows) {
|
|
@@ -313,8 +445,8 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
|
|
|
313
445
|
label,
|
|
314
446
|
filePath: nodeRow.filePath ?? nodeRow[2] ?? '',
|
|
315
447
|
distance: item.distance,
|
|
316
|
-
startLine:
|
|
317
|
-
endLine:
|
|
448
|
+
startLine: item.startLine,
|
|
449
|
+
endLine: item.endLine,
|
|
318
450
|
});
|
|
319
451
|
}
|
|
320
452
|
}
|
|
@@ -323,26 +455,13 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
|
|
|
323
455
|
// Table might not exist, skip
|
|
324
456
|
}
|
|
325
457
|
}
|
|
326
|
-
// Re-sort by distance since batch queries may have mixed order
|
|
327
458
|
results.sort((a, b) => a.distance - b.distance);
|
|
328
459
|
return results;
|
|
329
460
|
};
|
|
330
461
|
/**
|
|
331
462
|
* Semantic search with graph expansion (flattened results)
|
|
332
|
-
*
|
|
333
|
-
* Note: With multi-table schema, graph traversal is simplified.
|
|
334
|
-
* Returns semantic matches with their metadata.
|
|
335
|
-
* For full graph traversal, use execute_vector_cypher tool directly.
|
|
336
|
-
*
|
|
337
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
338
|
-
* @param query - Search query text
|
|
339
|
-
* @param k - Number of initial semantic matches (default: 5)
|
|
340
|
-
* @param _hops - Unused (kept for API compatibility).
|
|
341
|
-
* @returns Semantic matches with metadata
|
|
342
463
|
*/
|
|
343
464
|
export const semanticSearchWithContext = async (executeQuery, query, k = 5, _hops = 1) => {
|
|
344
|
-
// For multi-table schema, just return semantic search results
|
|
345
|
-
// Graph traversal is complex with separate tables - use execute_vector_cypher instead
|
|
346
465
|
const results = await semanticSearch(executeQuery, query, k, 0.5);
|
|
347
466
|
return results.map((r) => ({
|
|
348
467
|
matchId: r.nodeId,
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export interface ResolvedLineRange {
|
|
2
|
+
startLine: number;
|
|
3
|
+
endLine: number;
|
|
4
|
+
}
|
|
5
|
+
export declare const buildLineIndex: (content: string) => Int32Array;
|
|
6
|
+
export declare const lineFromOffset: (lineOffsets: Int32Array, charOffset: number) => number;
|
|
7
|
+
export declare const resolveChunkLines: (lineOffsets: Int32Array, startOffset: number, endOffset: number, baseStartLine: number) => ResolvedLineRange;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export const buildLineIndex = (content) => {
|
|
2
|
+
const offsets = [0];
|
|
3
|
+
for (let i = 0; i < content.length; i++) {
|
|
4
|
+
if (content.charCodeAt(i) === 10)
|
|
5
|
+
offsets.push(i + 1);
|
|
6
|
+
}
|
|
7
|
+
return new Int32Array(offsets);
|
|
8
|
+
};
|
|
9
|
+
const clampOffset = (lineOffsets, charOffset) => {
|
|
10
|
+
if (lineOffsets.length === 0)
|
|
11
|
+
return 0;
|
|
12
|
+
const maxOffset = lineOffsets[lineOffsets.length - 1];
|
|
13
|
+
if (charOffset < 0)
|
|
14
|
+
return 0;
|
|
15
|
+
if (charOffset > maxOffset)
|
|
16
|
+
return maxOffset;
|
|
17
|
+
return charOffset;
|
|
18
|
+
};
|
|
19
|
+
export const lineFromOffset = (lineOffsets, charOffset) => {
|
|
20
|
+
if (lineOffsets.length === 0)
|
|
21
|
+
return 0;
|
|
22
|
+
const clamped = clampOffset(lineOffsets, charOffset);
|
|
23
|
+
let lo = 0;
|
|
24
|
+
let hi = lineOffsets.length - 1;
|
|
25
|
+
while (lo < hi) {
|
|
26
|
+
const mid = (lo + hi + 1) >> 1;
|
|
27
|
+
if (lineOffsets[mid] <= clamped)
|
|
28
|
+
lo = mid;
|
|
29
|
+
else
|
|
30
|
+
hi = mid - 1;
|
|
31
|
+
}
|
|
32
|
+
return lo;
|
|
33
|
+
};
|
|
34
|
+
export const resolveChunkLines = (lineOffsets, startOffset, endOffset, baseStartLine) => {
|
|
35
|
+
const relativeStartLine = lineFromOffset(lineOffsets, startOffset);
|
|
36
|
+
const effectiveEndOffset = endOffset > startOffset ? endOffset - 1 : startOffset;
|
|
37
|
+
const relativeEndLine = lineFromOffset(lineOffsets, effectiveEndOffset);
|
|
38
|
+
return {
|
|
39
|
+
startLine: baseStartLine + relativeStartLine,
|
|
40
|
+
endLine: baseStartLine + relativeEndLine,
|
|
41
|
+
};
|
|
42
|
+
};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server Mapping Configuration
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.gitnexus/server-mapping.json to map repo names to service names.
|
|
5
|
+
* Used in embedding text to enrich metadata with microservice context.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Read the server mapping file and return the serverName for a given repoName.
|
|
9
|
+
* Returns undefined if no mapping exists.
|
|
10
|
+
*/
|
|
11
|
+
export declare const readServerMapping: (repoName: string) => Promise<string | undefined>;
|
|
12
|
+
/**
|
|
13
|
+
* Clear the cached mapping (useful for testing or after file changes)
|
|
14
|
+
*/
|
|
15
|
+
export declare const clearServerMappingCache: () => void;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server Mapping Configuration
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.gitnexus/server-mapping.json to map repo names to service names.
|
|
5
|
+
* Used in embedding text to enrich metadata with microservice context.
|
|
6
|
+
*/
|
|
7
|
+
import fs from 'fs/promises';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import os from 'os';
|
|
10
|
+
const MAPPING_FILE = path.join(os.homedir(), '.gitnexus', 'server-mapping.json');
|
|
11
|
+
let cachedMapping = null;
|
|
12
|
+
/**
|
|
13
|
+
* Read the server mapping file and return the serverName for a given repoName.
|
|
14
|
+
* Returns undefined if no mapping exists.
|
|
15
|
+
*/
|
|
16
|
+
export const readServerMapping = async (repoName) => {
|
|
17
|
+
try {
|
|
18
|
+
if (!cachedMapping) {
|
|
19
|
+
const raw = await fs.readFile(MAPPING_FILE, 'utf-8');
|
|
20
|
+
cachedMapping = JSON.parse(raw);
|
|
21
|
+
}
|
|
22
|
+
return cachedMapping[repoName];
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return undefined;
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Clear the cached mapping (useful for testing or after file changes)
|
|
30
|
+
*/
|
|
31
|
+
export const clearServerMappingCache = () => {
|
|
32
|
+
cachedMapping = null;
|
|
33
|
+
};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structural Extractor Module
|
|
3
|
+
*
|
|
4
|
+
* Reuses ingestion pipeline's AST-based MethodExtractor / FieldExtractor
|
|
5
|
+
* to extract method and field names for embedding text generation.
|
|
6
|
+
*/
|
|
7
|
+
export interface StructuralNames {
|
|
8
|
+
methodNames: string[];
|
|
9
|
+
fieldNames: string[];
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Extract method and field names from a class/struct/interface node
|
|
13
|
+
* using the ingestion pipeline's AST extractors.
|
|
14
|
+
*/
|
|
15
|
+
export declare const extractStructuralNames: (content: string, filePath: string) => Promise<StructuralNames>;
|