gitnexus 1.6.2-rc.8 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/lbug/schema-constants.d.ts +1 -1
- package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
- package/dist/_shared/lbug/schema-constants.js +1 -0
- package/dist/_shared/lbug/schema-constants.js.map +1 -1
- package/dist/_shared/mro-strategy.d.ts +38 -16
- package/dist/_shared/mro-strategy.d.ts.map +1 -1
- package/dist/cli/ai-context.js +0 -58
- package/dist/cli/analyze.js +3 -0
- package/dist/core/embeddings/ast-utils.d.ts +22 -0
- package/dist/core/embeddings/ast-utils.js +105 -0
- package/dist/core/embeddings/character-chunk.d.ts +12 -0
- package/dist/core/embeddings/character-chunk.js +43 -0
- package/dist/core/embeddings/chunker.d.ts +14 -0
- package/dist/core/embeddings/chunker.js +234 -0
- package/dist/core/embeddings/embedder.js +5 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +20 -24
- package/dist/core/embeddings/embedding-pipeline.js +176 -107
- package/dist/core/embeddings/line-index.d.ts +7 -0
- package/dist/core/embeddings/line-index.js +42 -0
- package/dist/core/embeddings/server-mapping.d.ts +15 -0
- package/dist/core/embeddings/server-mapping.js +33 -0
- package/dist/core/embeddings/structural-extractor.d.ts +15 -0
- package/dist/core/embeddings/structural-extractor.js +58 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -13
- package/dist/core/embeddings/text-generator.js +151 -119
- package/dist/core/embeddings/types.d.ts +81 -3
- package/dist/core/embeddings/types.js +105 -3
- package/dist/core/group/extractors/http-patterns/node.js +130 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
- package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
- package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
- package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/call-extractors/generic.js +59 -0
- package/dist/core/ingestion/call-processor.d.ts +2 -4
- package/dist/core/ingestion/call-processor.js +221 -89
- package/dist/core/ingestion/call-routing.d.ts +8 -12
- package/dist/core/ingestion/call-routing.js +13 -34
- package/dist/core/ingestion/call-types.d.ts +135 -0
- package/dist/core/ingestion/call-types.js +2 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
- package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
- package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
- package/dist/core/ingestion/field-types.d.ts +1 -1
- package/dist/core/ingestion/heritage-extractors/configs/go.d.ts +13 -0
- package/dist/core/ingestion/heritage-extractors/configs/go.js +20 -0
- package/dist/core/ingestion/heritage-extractors/configs/ruby.d.ts +18 -0
- package/dist/core/ingestion/heritage-extractors/configs/ruby.js +65 -0
- package/dist/core/ingestion/heritage-extractors/generic.d.ts +23 -0
- package/dist/core/ingestion/heritage-extractors/generic.js +47 -0
- package/dist/core/ingestion/heritage-processor.d.ts +9 -0
- package/dist/core/ingestion/heritage-processor.js +120 -85
- package/dist/core/ingestion/heritage-types.d.ts +73 -0
- package/dist/core/ingestion/heritage-types.js +2 -0
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
- package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
- package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
- package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
- package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
- package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
- package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
- package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
- package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
- package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/go.js +4 -19
- package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
- package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
- package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/php.js +4 -7
- package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
- package/dist/core/ingestion/import-resolvers/python.js +3 -18
- package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
- package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
- package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
- package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/rust.js +4 -47
- package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
- package/dist/core/ingestion/import-resolvers/standard.js +7 -8
- package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
- package/dist/core/ingestion/language-provider.d.ts +80 -0
- package/dist/core/ingestion/languages/c-cpp.js +18 -12
- package/dist/core/ingestion/languages/csharp.js +13 -21
- package/dist/core/ingestion/languages/dart.js +13 -7
- package/dist/core/ingestion/languages/go.js +14 -20
- package/dist/core/ingestion/languages/java.js +13 -18
- package/dist/core/ingestion/languages/kotlin.js +13 -13
- package/dist/core/ingestion/languages/php.js +13 -7
- package/dist/core/ingestion/languages/python.js +13 -7
- package/dist/core/ingestion/languages/ruby.js +103 -22
- package/dist/core/ingestion/languages/rust.js +13 -7
- package/dist/core/ingestion/languages/swift.js +13 -18
- package/dist/core/ingestion/languages/typescript.js +18 -23
- package/dist/core/ingestion/languages/vue.js +13 -17
- package/dist/core/ingestion/model/heritage-map.d.ts +35 -0
- package/dist/core/ingestion/model/heritage-map.js +110 -9
- package/dist/core/ingestion/model/index.d.ts +2 -2
- package/dist/core/ingestion/model/index.js +1 -1
- package/dist/core/ingestion/model/resolve.d.ts +33 -28
- package/dist/core/ingestion/model/resolve.js +111 -27
- package/dist/core/ingestion/parsing-processor.d.ts +1 -2
- package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +1 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +9 -3
- package/dist/core/ingestion/pipeline-phases/parse.d.ts +7 -0
- package/dist/core/ingestion/pipeline.d.ts +11 -0
- package/dist/core/ingestion/pipeline.js +9 -2
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
- package/dist/core/ingestion/tree-sitter-queries.js +81 -0
- package/dist/core/ingestion/type-env.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.js +22 -2
- package/dist/core/ingestion/utils/ruby-self-call.d.ts +52 -0
- package/dist/core/ingestion/utils/ruby-self-call.js +59 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
- package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
- package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
- package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
- package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
- package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/variable-extractors/generic.js +80 -0
- package/dist/core/ingestion/variable-types.d.ts +82 -0
- package/dist/core/ingestion/variable-types.js +2 -0
- package/dist/core/ingestion/workers/parse-worker.js +244 -217
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/lbug/csv-generator.js +1 -0
- package/dist/core/lbug/lbug-adapter.d.ts +4 -5
- package/dist/core/lbug/lbug-adapter.js +38 -14
- package/dist/core/lbug/schema.d.ts +2 -1
- package/dist/core/lbug/schema.js +10 -1
- package/dist/core/run-analyze.js +6 -7
- package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
- package/dist/core/tree-sitter/parser-loader.js +17 -8
- package/dist/mcp/core/embedder.js +5 -0
- package/dist/mcp/local/local-backend.js +29 -19
- package/dist/server/api.js +2 -0
- package/dist/types/pipeline.d.ts +6 -0
- package/package.json +8 -7
- package/scripts/build-tree-sitter-proto.cjs +82 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
- package/vendor/tree-sitter-proto/package.json +1 -7
- package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
- package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
- package/dist/core/ingestion/call-sites/java.d.ts +0 -9
- package/dist/core/ingestion/call-sites/java.js +0 -30
- package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
- package/dist/core/ingestion/import-resolvers/vue.js +0 -9
- package/scripts/preinstall-cleanup.cjs +0 -34
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structural Extractor Module
|
|
3
|
+
*
|
|
4
|
+
* Reuses ingestion pipeline's AST-based MethodExtractor / FieldExtractor
|
|
5
|
+
* to extract method and field names for embedding text generation.
|
|
6
|
+
*/
|
|
7
|
+
import { getProviderForFile } from '../ingestion/languages/index.js';
|
|
8
|
+
import { buildTypeEnv } from '../ingestion/type-env.js';
|
|
9
|
+
import { ensureAndParse, findDeclarationNode } from './ast-utils.js';
|
|
10
|
+
const NOOP_SYMBOL_TABLE = {
|
|
11
|
+
lookupExactAll: () => [],
|
|
12
|
+
lookupExact: () => undefined,
|
|
13
|
+
lookupExactFull: () => undefined,
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Extract method and field names from a class/struct/interface node
|
|
17
|
+
* using the ingestion pipeline's AST extractors.
|
|
18
|
+
*/
|
|
19
|
+
export const extractStructuralNames = async (content, filePath) => {
|
|
20
|
+
const provider = getProviderForFile(filePath);
|
|
21
|
+
if (!provider)
|
|
22
|
+
return { methodNames: [], fieldNames: [] };
|
|
23
|
+
const tree = await ensureAndParse(content, filePath);
|
|
24
|
+
if (!tree)
|
|
25
|
+
return { methodNames: [], fieldNames: [] };
|
|
26
|
+
// Parse node.content (a snippet) — find declaration directly, not by range
|
|
27
|
+
const classNode = findDeclarationNode(tree.rootNode);
|
|
28
|
+
if (!classNode)
|
|
29
|
+
return { methodNames: [], fieldNames: [] };
|
|
30
|
+
const language = provider.id;
|
|
31
|
+
const methodNames = extractMethodNames(classNode, provider, filePath, language);
|
|
32
|
+
const fieldNames = extractFieldNames(classNode, provider, tree, filePath, language);
|
|
33
|
+
return { methodNames, fieldNames };
|
|
34
|
+
};
|
|
35
|
+
function extractMethodNames(classNode, provider, filePath, language) {
|
|
36
|
+
if (!provider.methodExtractor)
|
|
37
|
+
return [];
|
|
38
|
+
const context = { filePath, language };
|
|
39
|
+
const result = provider.methodExtractor.extract(classNode, context);
|
|
40
|
+
if (!result?.methods?.length)
|
|
41
|
+
return [];
|
|
42
|
+
return result.methods.map((m) => m.name);
|
|
43
|
+
}
|
|
44
|
+
function extractFieldNames(classNode, provider, tree, filePath, language) {
|
|
45
|
+
if (!provider.fieldExtractor)
|
|
46
|
+
return [];
|
|
47
|
+
const typeEnv = buildTypeEnv(tree, language);
|
|
48
|
+
const context = {
|
|
49
|
+
typeEnv,
|
|
50
|
+
symbolTable: NOOP_SYMBOL_TABLE,
|
|
51
|
+
filePath,
|
|
52
|
+
language,
|
|
53
|
+
};
|
|
54
|
+
const result = provider.fieldExtractor.extract(classNode, context);
|
|
55
|
+
if (!result?.fields?.length)
|
|
56
|
+
return [];
|
|
57
|
+
return result.fields.map((f) => f.name);
|
|
58
|
+
}
|
|
@@ -1,24 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Text Generator Module
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Generates enriched embedding text from code nodes with metadata.
|
|
5
|
+
* Supports chunkable labels (Function/Method with AST chunking),
|
|
6
|
+
* Class-specific structural text, and short-node direct embed.
|
|
7
|
+
*
|
|
8
|
+
* Method/field names for Class nodes are extracted by the ingestion
|
|
9
|
+
* pipeline's AST extractors and passed via node.methodNames/node.fieldNames.
|
|
6
10
|
*/
|
|
7
11
|
import type { EmbeddableNode, EmbeddingConfig } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Truncate description to max length at sentence/word boundary
|
|
14
|
+
*/
|
|
15
|
+
declare const truncateDescription: (text: string, maxLength: number) => string;
|
|
16
|
+
/**
|
|
17
|
+
* Extract class/interface/struct declaration lines, skipping method bodies.
|
|
18
|
+
* - Brace-based languages: detects method signatures (lines with `(` and `{`)
|
|
19
|
+
* and skips until depth returns to class body level.
|
|
20
|
+
* - Non-brace languages (Python/Ruby): returns empty string (patterns handle extraction).
|
|
21
|
+
*/
|
|
22
|
+
export declare const extractDeclarationOnly: (content: string) => string;
|
|
8
23
|
/**
|
|
9
24
|
* Generate embedding text for any embeddable node
|
|
10
25
|
* Dispatches to the appropriate generator based on node label
|
|
11
|
-
*
|
|
12
|
-
* @param node - The node to generate text for
|
|
13
|
-
* @param config - Optional configuration for max snippet length
|
|
14
|
-
* @returns Text suitable for embedding
|
|
15
26
|
*/
|
|
16
|
-
export declare const generateEmbeddingText: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
|
|
27
|
+
export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig>) => string;
|
|
17
28
|
/**
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
* @param nodes - Array of nodes to generate text for
|
|
21
|
-
* @param config - Optional configuration
|
|
22
|
-
* @returns Array of texts in the same order as input nodes
|
|
29
|
+
* Export truncation helper for testing
|
|
23
30
|
*/
|
|
24
|
-
export
|
|
31
|
+
export { truncateDescription };
|
|
@@ -1,167 +1,199 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Text Generator Module
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
*
|
|
10
|
-
*/
|
|
11
|
-
const getFileName = (filePath) => {
|
|
12
|
-
const parts = filePath.split('/');
|
|
13
|
-
return parts[parts.length - 1] || filePath;
|
|
14
|
-
};
|
|
15
|
-
/**
|
|
16
|
-
* Extract the directory path from a file path
|
|
4
|
+
* Generates enriched embedding text from code nodes with metadata.
|
|
5
|
+
* Supports chunkable labels (Function/Method with AST chunking),
|
|
6
|
+
* Class-specific structural text, and short-node direct embed.
|
|
7
|
+
*
|
|
8
|
+
* Method/field names for Class nodes are extracted by the ingestion
|
|
9
|
+
* pipeline's AST extractors and passed via node.methodNames/node.fieldNames.
|
|
17
10
|
*/
|
|
18
|
-
|
|
19
|
-
const parts = filePath.split('/');
|
|
20
|
-
parts.pop();
|
|
21
|
-
return parts.join('/') || '';
|
|
22
|
-
};
|
|
11
|
+
import { DEFAULT_EMBEDDING_CONFIG, isShortLabel } from './types.js';
|
|
23
12
|
/**
|
|
24
|
-
* Truncate
|
|
13
|
+
* Truncate description to max length at sentence/word boundary
|
|
25
14
|
*/
|
|
26
|
-
const
|
|
27
|
-
if (
|
|
28
|
-
return
|
|
15
|
+
const truncateDescription = (text, maxLength) => {
|
|
16
|
+
if (text.length <= maxLength)
|
|
17
|
+
return text;
|
|
18
|
+
const truncated = text.slice(0, maxLength);
|
|
19
|
+
// Try sentence boundary (. ! ?)
|
|
20
|
+
const sentenceEnd = Math.max(truncated.lastIndexOf('. '), truncated.lastIndexOf('! '), truncated.lastIndexOf('? '));
|
|
21
|
+
if (sentenceEnd > maxLength * 0.5) {
|
|
22
|
+
return truncated.slice(0, sentenceEnd + 1);
|
|
29
23
|
}
|
|
30
|
-
//
|
|
31
|
-
const truncated = content.slice(0, maxLength);
|
|
24
|
+
// Try word boundary
|
|
32
25
|
const lastSpace = truncated.lastIndexOf(' ');
|
|
33
|
-
if (lastSpace > maxLength * 0.
|
|
34
|
-
return truncated.slice(0, lastSpace)
|
|
26
|
+
if (lastSpace > maxLength * 0.5) {
|
|
27
|
+
return truncated.slice(0, lastSpace);
|
|
35
28
|
}
|
|
36
|
-
return truncated
|
|
29
|
+
return truncated;
|
|
37
30
|
};
|
|
38
31
|
/**
|
|
39
32
|
* Clean code content for embedding
|
|
40
|
-
* Removes excessive whitespace while preserving structure
|
|
41
33
|
*/
|
|
42
34
|
const cleanContent = (content) => {
|
|
43
|
-
return
|
|
44
|
-
// Normalize line endings
|
|
35
|
+
return content
|
|
45
36
|
.replace(/\r\n/g, '\n')
|
|
46
|
-
// Remove excessive blank lines (more than 2)
|
|
47
37
|
.replace(/\n{3,}/g, '\n\n')
|
|
48
|
-
// Trim each line
|
|
49
38
|
.split('\n')
|
|
50
39
|
.map((line) => line.trimEnd())
|
|
51
40
|
.join('\n')
|
|
52
|
-
.trim()
|
|
41
|
+
.trim();
|
|
53
42
|
};
|
|
54
43
|
/**
|
|
55
|
-
*
|
|
44
|
+
* Build metadata header for a node
|
|
56
45
|
*/
|
|
57
|
-
const
|
|
58
|
-
const parts = [
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
46
|
+
const buildMetadataHeader = (node, config) => {
|
|
47
|
+
const parts = [];
|
|
48
|
+
// Label + name
|
|
49
|
+
parts.push(`${node.label}: ${node.name}`);
|
|
50
|
+
// Repo name
|
|
51
|
+
if (node.repoName) {
|
|
52
|
+
parts.push(`Repo: ${node.repoName}`);
|
|
62
53
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
parts.push('', snippet);
|
|
54
|
+
// Server name (optional)
|
|
55
|
+
if (node.serverName) {
|
|
56
|
+
parts.push(`Server: ${node.serverName}`);
|
|
67
57
|
}
|
|
68
|
-
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
const generateClassText = (node, maxSnippetLength) => {
|
|
74
|
-
const parts = [`Class: ${node.name}`, `File: ${getFileName(node.filePath)}`];
|
|
75
|
-
const dir = getDirectory(node.filePath);
|
|
76
|
-
if (dir) {
|
|
77
|
-
parts.push(`Directory: ${dir}`);
|
|
58
|
+
// Full file path
|
|
59
|
+
parts.push(`Path: ${node.filePath}`);
|
|
60
|
+
// Export status
|
|
61
|
+
if (node.isExported !== undefined) {
|
|
62
|
+
parts.push(`Export: ${node.isExported}`);
|
|
78
63
|
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
const
|
|
82
|
-
|
|
64
|
+
// Description (truncated)
|
|
65
|
+
if (node.description) {
|
|
66
|
+
const maxLen = config.maxDescriptionLength ?? DEFAULT_EMBEDDING_CONFIG.maxDescriptionLength;
|
|
67
|
+
const truncated = truncateDescription(node.description, maxLen);
|
|
68
|
+
if (truncated) {
|
|
69
|
+
parts.push(truncated);
|
|
70
|
+
}
|
|
83
71
|
}
|
|
84
72
|
return parts.join('\n');
|
|
85
73
|
};
|
|
74
|
+
const generateCodeBodyText = (node, codeBody, config) => {
|
|
75
|
+
const header = buildMetadataHeader(node, config);
|
|
76
|
+
const cleaned = cleanContent(codeBody);
|
|
77
|
+
return `${header}\n\n${cleaned}`;
|
|
78
|
+
};
|
|
86
79
|
/**
|
|
87
|
-
* Generate embedding text for
|
|
80
|
+
* Generate embedding text for Class nodes
|
|
81
|
+
* Signature + properties + method name list only (no method bodies)
|
|
82
|
+
* Method/field names come from AST extractors via node.methodNames/node.fieldNames.
|
|
88
83
|
*/
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
84
|
+
const generateClassText = (node, codeBody, config) => {
|
|
85
|
+
return generateStructuralTypeText(node, codeBody, config);
|
|
86
|
+
};
|
|
87
|
+
const generateStructuralTypeText = (node, codeBody, config) => {
|
|
88
|
+
const header = buildMetadataHeader(node, config);
|
|
89
|
+
const parts = [header];
|
|
90
|
+
if (node.methodNames?.length) {
|
|
91
|
+
parts.push(`Methods: ${node.methodNames.join(', ')}`);
|
|
94
92
|
}
|
|
95
|
-
if (node.
|
|
96
|
-
|
|
97
|
-
const snippet = truncateContent(cleanedContent, maxSnippetLength);
|
|
98
|
-
parts.push('', snippet);
|
|
93
|
+
if (node.fieldNames?.length) {
|
|
94
|
+
parts.push(`Properties: ${node.fieldNames.join(', ')}`);
|
|
99
95
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
* Generate embedding text for an Interface node
|
|
104
|
-
*/
|
|
105
|
-
const generateInterfaceText = (node, maxSnippetLength) => {
|
|
106
|
-
const parts = [`Interface: ${node.name}`, `File: ${getFileName(node.filePath)}`];
|
|
107
|
-
const dir = getDirectory(node.filePath);
|
|
108
|
-
if (dir) {
|
|
109
|
-
parts.push(`Directory: ${dir}`);
|
|
96
|
+
const declarationOnly = extractDeclarationOnly(cleanContent(node.content));
|
|
97
|
+
if (declarationOnly) {
|
|
98
|
+
parts.push('', declarationOnly);
|
|
110
99
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
parts.push('', snippet);
|
|
100
|
+
const cleanedChunk = cleanContent(codeBody);
|
|
101
|
+
if (cleanedChunk && cleanedChunk !== cleanContent(node.content)) {
|
|
102
|
+
parts.push('', cleanedChunk);
|
|
115
103
|
}
|
|
116
104
|
return parts.join('\n');
|
|
117
105
|
};
|
|
106
|
+
const DECL_START_RE = /^(?:(?:export|pub|data|abstract)\s+)*(?:type\s+\w+\s+struct|(?:class|struct|enum|interface)\s)/;
|
|
118
107
|
/**
|
|
119
|
-
*
|
|
120
|
-
*
|
|
108
|
+
* Extract class/interface/struct declaration lines, skipping method bodies.
|
|
109
|
+
* - Brace-based languages: detects method signatures (lines with `(` and `{`)
|
|
110
|
+
* and skips until depth returns to class body level.
|
|
111
|
+
* - Non-brace languages (Python/Ruby): returns empty string (patterns handle extraction).
|
|
121
112
|
*/
|
|
122
|
-
const
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
113
|
+
export const extractDeclarationOnly = (content) => {
|
|
114
|
+
const lines = content.split('\n');
|
|
115
|
+
const declLines = [];
|
|
116
|
+
let depth = 0;
|
|
117
|
+
let started = false;
|
|
118
|
+
let classDepth = 0;
|
|
119
|
+
let skipDepth = 0;
|
|
120
|
+
for (const [idx, line] of lines.entries()) {
|
|
121
|
+
const trimmed = line.trim();
|
|
122
|
+
if (!started) {
|
|
123
|
+
if (DECL_START_RE.test(trimmed)) {
|
|
124
|
+
// Non-brace language check: current line or next 3 lines must have `{`
|
|
125
|
+
const nextLines = lines.slice(idx + 1, idx + 4);
|
|
126
|
+
if (!trimmed.includes('{') && !nextLines.some((l) => l.includes('{'))) {
|
|
127
|
+
return '';
|
|
128
|
+
}
|
|
129
|
+
started = true;
|
|
130
|
+
declLines.push(trimmed);
|
|
131
|
+
for (const ch of trimmed) {
|
|
132
|
+
if (ch === '{')
|
|
133
|
+
depth++;
|
|
134
|
+
else if (ch === '}')
|
|
135
|
+
depth--;
|
|
136
|
+
}
|
|
137
|
+
if (depth > 0)
|
|
138
|
+
classDepth = depth;
|
|
139
|
+
}
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
// Always update depth (even when skipping)
|
|
143
|
+
const opens = (trimmed.match(/{/g) || []).length;
|
|
144
|
+
const closes = (trimmed.match(/}/g) || []).length;
|
|
145
|
+
const prevDepth = depth;
|
|
146
|
+
depth += opens - closes;
|
|
147
|
+
if (skipDepth > 0) {
|
|
148
|
+
if (depth <= classDepth) {
|
|
149
|
+
skipDepth = 0;
|
|
150
|
+
// Closing brace of class
|
|
151
|
+
if (depth <= 0) {
|
|
152
|
+
declLines.push(trimmed);
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
// Detect method signature: line has both `(` and `{` and goes deeper than class body
|
|
159
|
+
const hasParens = trimmed.includes('(');
|
|
160
|
+
const hasOpenBrace = opens > 0;
|
|
161
|
+
if (hasParens && hasOpenBrace && prevDepth + opens > classDepth) {
|
|
162
|
+
if (opens === closes && trimmed.endsWith(';')) {
|
|
163
|
+
// Property with function/object initializer like `config = { timeout: 5000 };` — keep
|
|
164
|
+
declLines.push(trimmed);
|
|
165
|
+
}
|
|
166
|
+
// else: single-line or multi-line method — skip entirely
|
|
167
|
+
if (opens !== closes) {
|
|
168
|
+
skipDepth = classDepth;
|
|
169
|
+
}
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
declLines.push(trimmed);
|
|
173
|
+
if (depth <= 0 && declLines.length > 1)
|
|
174
|
+
break;
|
|
129
175
|
}
|
|
130
|
-
return
|
|
176
|
+
return declLines.join('\n').trim();
|
|
131
177
|
};
|
|
132
178
|
/**
|
|
133
179
|
* Generate embedding text for any embeddable node
|
|
134
180
|
* Dispatches to the appropriate generator based on node label
|
|
135
|
-
*
|
|
136
|
-
* @param node - The node to generate text for
|
|
137
|
-
* @param config - Optional configuration for max snippet length
|
|
138
|
-
* @returns Text suitable for embedding
|
|
139
181
|
*/
|
|
140
|
-
export const generateEmbeddingText = (node, config = {}) => {
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
case 'File':
|
|
152
|
-
return generateFileText(node, maxSnippetLength);
|
|
153
|
-
default:
|
|
154
|
-
// Fallback for any other embeddable type
|
|
155
|
-
return `${node.label}: ${node.name}\nPath: ${node.filePath}`;
|
|
182
|
+
export const generateEmbeddingText = (node, codeBody, config = {}) => {
|
|
183
|
+
if (isShortLabel(node.label)) {
|
|
184
|
+
const header = buildMetadataHeader(node, config);
|
|
185
|
+
const cleaned = cleanContent(node.content);
|
|
186
|
+
return `${header}\n\n${cleaned}`;
|
|
187
|
+
}
|
|
188
|
+
if (node.label === 'Class') {
|
|
189
|
+
return generateClassText(node, codeBody, config);
|
|
190
|
+
}
|
|
191
|
+
if (node.label === 'Interface') {
|
|
192
|
+
return generateStructuralTypeText(node, codeBody, config);
|
|
156
193
|
}
|
|
194
|
+
return generateCodeBodyText(node, codeBody, config);
|
|
157
195
|
};
|
|
158
196
|
/**
|
|
159
|
-
*
|
|
160
|
-
*
|
|
161
|
-
* @param nodes - Array of nodes to generate text for
|
|
162
|
-
* @param config - Optional configuration
|
|
163
|
-
* @returns Array of texts in the same order as input nodes
|
|
197
|
+
* Export truncation helper for testing
|
|
164
198
|
*/
|
|
165
|
-
export
|
|
166
|
-
return nodes.map((node) => generateEmbeddingText(node, config));
|
|
167
|
-
};
|
|
199
|
+
export { truncateDescription };
|
|
@@ -4,15 +4,38 @@
|
|
|
4
4
|
* Type definitions for the embedding generation and semantic search system.
|
|
5
5
|
*/
|
|
6
6
|
/**
|
|
7
|
-
* Node labels that
|
|
8
|
-
* These are code elements that benefit from semantic matching
|
|
7
|
+
* Node labels that need chunking (have code body, potentially long)
|
|
9
8
|
*/
|
|
10
|
-
export declare const
|
|
9
|
+
export declare const CHUNKABLE_LABELS: readonly ["Function", "Method", "Constructor", "Class", "Interface", "Struct", "Enum", "Trait", "Impl", "Macro", "Namespace"];
|
|
10
|
+
/**
|
|
11
|
+
* Node labels that are short (no chunking needed, embed directly)
|
|
12
|
+
*/
|
|
13
|
+
export declare const SHORT_LABELS: readonly ["TypeAlias", "Typedef", "Const", "Property", "Record", "Union", "Static", "Variable"];
|
|
14
|
+
/**
|
|
15
|
+
* All embeddable labels (union of CHUNKABLE + SHORT)
|
|
16
|
+
*/
|
|
17
|
+
export declare const EMBEDDABLE_LABELS: readonly ["Function", "Method", "Constructor", "Class", "Interface", "Struct", "Enum", "Trait", "Impl", "Macro", "Namespace", "TypeAlias", "Typedef", "Const", "Property", "Record", "Union", "Static", "Variable"];
|
|
11
18
|
export type EmbeddableLabel = (typeof EMBEDDABLE_LABELS)[number];
|
|
12
19
|
/**
|
|
13
20
|
* Check if a label should be embedded
|
|
14
21
|
*/
|
|
15
22
|
export declare const isEmbeddableLabel: (label: string) => label is EmbeddableLabel;
|
|
23
|
+
/**
|
|
24
|
+
* Check if a label needs chunking
|
|
25
|
+
*/
|
|
26
|
+
export declare const isChunkableLabel: (label: string) => boolean;
|
|
27
|
+
/**
|
|
28
|
+
* Check if a label is a short type (no chunking)
|
|
29
|
+
*/
|
|
30
|
+
export declare const isShortLabel: (label: string) => boolean;
|
|
31
|
+
/**
|
|
32
|
+
* Node labels that have structural names (methods/fields) extractable via AST
|
|
33
|
+
*/
|
|
34
|
+
export declare const STRUCTURAL_LABELS: ReadonlySet<string>;
|
|
35
|
+
/**
|
|
36
|
+
* Node labels that have isExported column in their schema
|
|
37
|
+
*/
|
|
38
|
+
export declare const LABELS_WITH_EXPORTED: ReadonlySet<string>;
|
|
16
39
|
/**
|
|
17
40
|
* Embedding pipeline phases
|
|
18
41
|
*/
|
|
@@ -44,6 +67,12 @@ export interface EmbeddingConfig {
|
|
|
44
67
|
device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
|
|
45
68
|
/** Maximum characters of code snippet to include */
|
|
46
69
|
maxSnippetLength: number;
|
|
70
|
+
/** Maximum code chunk size in characters (for chunking long code) */
|
|
71
|
+
chunkSize: number;
|
|
72
|
+
/** Overlap between chunks in characters */
|
|
73
|
+
overlap: number;
|
|
74
|
+
/** Maximum description length in characters */
|
|
75
|
+
maxDescriptionLength: number;
|
|
47
76
|
}
|
|
48
77
|
/**
|
|
49
78
|
* Default embedding configuration
|
|
@@ -74,6 +103,32 @@ export interface EmbeddableNode {
|
|
|
74
103
|
content: string;
|
|
75
104
|
startLine?: number;
|
|
76
105
|
endLine?: number;
|
|
106
|
+
isExported?: boolean;
|
|
107
|
+
description?: string;
|
|
108
|
+
parameterCount?: number;
|
|
109
|
+
returnType?: string;
|
|
110
|
+
repoName?: string;
|
|
111
|
+
serverName?: string;
|
|
112
|
+
methodNames?: string[];
|
|
113
|
+
fieldNames?: string[];
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Cached embedding entry restored from LadybugDB before a graph rebuild
|
|
117
|
+
*/
|
|
118
|
+
export interface CachedEmbedding {
|
|
119
|
+
nodeId: string;
|
|
120
|
+
chunkIndex: number;
|
|
121
|
+
startLine: number;
|
|
122
|
+
endLine: number;
|
|
123
|
+
embedding: number[];
|
|
124
|
+
contentHash?: string;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Context info for embedding pipeline (repo/server metadata enrichment)
|
|
128
|
+
*/
|
|
129
|
+
export interface EmbeddingContext {
|
|
130
|
+
repoName?: string;
|
|
131
|
+
serverName?: string;
|
|
77
132
|
}
|
|
78
133
|
/**
|
|
79
134
|
* Model download progress from transformers.js
|
|
@@ -85,3 +140,26 @@ export interface ModelProgress {
|
|
|
85
140
|
loaded?: number;
|
|
86
141
|
total?: number;
|
|
87
142
|
}
|
|
143
|
+
export interface ChunkSearchRow {
|
|
144
|
+
nodeId: string;
|
|
145
|
+
chunkIndex: number;
|
|
146
|
+
startLine: number;
|
|
147
|
+
endLine: number;
|
|
148
|
+
distance: number;
|
|
149
|
+
}
|
|
150
|
+
export interface BestChunkMatch {
|
|
151
|
+
chunkIndex: number;
|
|
152
|
+
startLine: number;
|
|
153
|
+
endLine: number;
|
|
154
|
+
distance: number;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Deduplicate vector search chunk results by nodeId,
|
|
158
|
+
* keeping the chunk with smallest distance for each node.
|
|
159
|
+
*/
|
|
160
|
+
export declare const dedupBestChunks: (rows: ChunkSearchRow[], limit?: number) => Map<string, BestChunkMatch>;
|
|
161
|
+
/**
|
|
162
|
+
* Fetch vector-search chunks until we have enough unique nodeIds
|
|
163
|
+
* or can tell the result set is exhausted.
|
|
164
|
+
*/
|
|
165
|
+
export declare const collectBestChunks: (limit: number, fetchRows: (fetchLimit: number) => Promise<ChunkSearchRow[]>, maxFetch?: number) => Promise<Map<string, BestChunkMatch>>;
|
|
@@ -4,14 +4,69 @@
|
|
|
4
4
|
* Type definitions for the embedding generation and semantic search system.
|
|
5
5
|
*/
|
|
6
6
|
/**
|
|
7
|
-
* Node labels that
|
|
8
|
-
* These are code elements that benefit from semantic matching
|
|
7
|
+
* Node labels that need chunking (have code body, potentially long)
|
|
9
8
|
*/
|
|
10
|
-
export const
|
|
9
|
+
export const CHUNKABLE_LABELS = [
|
|
10
|
+
'Function',
|
|
11
|
+
'Method',
|
|
12
|
+
'Constructor',
|
|
13
|
+
'Class',
|
|
14
|
+
'Interface',
|
|
15
|
+
'Struct',
|
|
16
|
+
'Enum',
|
|
17
|
+
'Trait',
|
|
18
|
+
'Impl',
|
|
19
|
+
'Macro',
|
|
20
|
+
'Namespace',
|
|
21
|
+
];
|
|
22
|
+
/**
|
|
23
|
+
* Node labels that are short (no chunking needed, embed directly)
|
|
24
|
+
*/
|
|
25
|
+
export const SHORT_LABELS = [
|
|
26
|
+
'TypeAlias',
|
|
27
|
+
'Typedef',
|
|
28
|
+
'Const',
|
|
29
|
+
'Property',
|
|
30
|
+
'Record',
|
|
31
|
+
'Union',
|
|
32
|
+
'Static',
|
|
33
|
+
'Variable',
|
|
34
|
+
];
|
|
35
|
+
/**
|
|
36
|
+
* All embeddable labels (union of CHUNKABLE + SHORT)
|
|
37
|
+
*/
|
|
38
|
+
export const EMBEDDABLE_LABELS = [...CHUNKABLE_LABELS, ...SHORT_LABELS];
|
|
11
39
|
/**
|
|
12
40
|
* Check if a label should be embedded
|
|
13
41
|
*/
|
|
14
42
|
export const isEmbeddableLabel = (label) => EMBEDDABLE_LABELS.includes(label);
|
|
43
|
+
/**
|
|
44
|
+
* Check if a label needs chunking
|
|
45
|
+
*/
|
|
46
|
+
export const isChunkableLabel = (label) => CHUNKABLE_LABELS.includes(label);
|
|
47
|
+
/**
|
|
48
|
+
* Check if a label is a short type (no chunking)
|
|
49
|
+
*/
|
|
50
|
+
export const isShortLabel = (label) => SHORT_LABELS.includes(label);
|
|
51
|
+
/**
|
|
52
|
+
* Node labels that have structural names (methods/fields) extractable via AST
|
|
53
|
+
*/
|
|
54
|
+
export const STRUCTURAL_LABELS = new Set([
|
|
55
|
+
'Class',
|
|
56
|
+
'Struct',
|
|
57
|
+
'Interface',
|
|
58
|
+
'Enum',
|
|
59
|
+
]);
|
|
60
|
+
/**
|
|
61
|
+
* Node labels that have isExported column in their schema
|
|
62
|
+
*/
|
|
63
|
+
export const LABELS_WITH_EXPORTED = new Set([
|
|
64
|
+
'Function',
|
|
65
|
+
'Class',
|
|
66
|
+
'Interface',
|
|
67
|
+
'Method',
|
|
68
|
+
'CodeElement',
|
|
69
|
+
]);
|
|
15
70
|
/**
|
|
16
71
|
* Default embedding configuration
|
|
17
72
|
* Uses snowflake-arctic-embed-xs for browser efficiency
|
|
@@ -23,4 +78,51 @@ export const DEFAULT_EMBEDDING_CONFIG = {
|
|
|
23
78
|
dimensions: 384,
|
|
24
79
|
device: 'auto',
|
|
25
80
|
maxSnippetLength: 500,
|
|
81
|
+
chunkSize: 1200,
|
|
82
|
+
overlap: 120,
|
|
83
|
+
maxDescriptionLength: 150,
|
|
84
|
+
};
|
|
85
|
+
/**
|
|
86
|
+
* Deduplicate vector search chunk results by nodeId,
|
|
87
|
+
* keeping the chunk with smallest distance for each node.
|
|
88
|
+
*/
|
|
89
|
+
export const dedupBestChunks = (rows, limit) => {
|
|
90
|
+
const best = new Map();
|
|
91
|
+
for (const row of rows) {
|
|
92
|
+
const existing = best.get(row.nodeId);
|
|
93
|
+
if (!existing || row.distance < existing.distance) {
|
|
94
|
+
best.set(row.nodeId, {
|
|
95
|
+
chunkIndex: row.chunkIndex,
|
|
96
|
+
startLine: row.startLine,
|
|
97
|
+
endLine: row.endLine,
|
|
98
|
+
distance: row.distance,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
if (limit !== undefined && best.size >= limit)
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
return best;
|
|
105
|
+
};
|
|
106
|
+
const DEFAULT_FETCH_MULTIPLIER = 4;
|
|
107
|
+
const DEFAULT_FETCH_BUFFER = 8;
|
|
108
|
+
const DEFAULT_MAX_FETCH = 200;
|
|
109
|
+
/**
|
|
110
|
+
* Fetch vector-search chunks until we have enough unique nodeIds
|
|
111
|
+
* or can tell the result set is exhausted.
|
|
112
|
+
*/
|
|
113
|
+
export const collectBestChunks = async (limit, fetchRows, maxFetch = DEFAULT_MAX_FETCH) => {
|
|
114
|
+
if (limit <= 0)
|
|
115
|
+
return new Map();
|
|
116
|
+
let fetchLimit = Math.max(limit * DEFAULT_FETCH_MULTIPLIER, limit + DEFAULT_FETCH_BUFFER);
|
|
117
|
+
let previousFetchLimit = 0;
|
|
118
|
+
while (fetchLimit > previousFetchLimit) {
|
|
119
|
+
const rows = await fetchRows(fetchLimit);
|
|
120
|
+
const bestChunks = dedupBestChunks(rows, limit);
|
|
121
|
+
if (bestChunks.size >= limit || rows.length < fetchLimit) {
|
|
122
|
+
return bestChunks;
|
|
123
|
+
}
|
|
124
|
+
previousFetchLimit = fetchLimit;
|
|
125
|
+
fetchLimit = fetchLimit >= maxFetch ? fetchLimit * 2 : Math.min(maxFetch, fetchLimit * 2);
|
|
126
|
+
}
|
|
127
|
+
return new Map();
|
|
26
128
|
};
|