gitnexus 1.6.2-rc.9 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/lbug/schema-constants.d.ts +1 -1
- package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
- package/dist/_shared/lbug/schema-constants.js +1 -0
- package/dist/_shared/lbug/schema-constants.js.map +1 -1
- package/dist/_shared/mro-strategy.d.ts +38 -16
- package/dist/_shared/mro-strategy.d.ts.map +1 -1
- package/dist/cli/ai-context.js +0 -58
- package/dist/cli/analyze.js +3 -0
- package/dist/core/embeddings/ast-utils.d.ts +22 -0
- package/dist/core/embeddings/ast-utils.js +105 -0
- package/dist/core/embeddings/character-chunk.d.ts +12 -0
- package/dist/core/embeddings/character-chunk.js +43 -0
- package/dist/core/embeddings/chunker.d.ts +14 -0
- package/dist/core/embeddings/chunker.js +234 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +20 -24
- package/dist/core/embeddings/embedding-pipeline.js +176 -107
- package/dist/core/embeddings/line-index.d.ts +7 -0
- package/dist/core/embeddings/line-index.js +42 -0
- package/dist/core/embeddings/server-mapping.d.ts +15 -0
- package/dist/core/embeddings/server-mapping.js +33 -0
- package/dist/core/embeddings/structural-extractor.d.ts +15 -0
- package/dist/core/embeddings/structural-extractor.js +58 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -13
- package/dist/core/embeddings/text-generator.js +151 -119
- package/dist/core/embeddings/types.d.ts +81 -3
- package/dist/core/embeddings/types.js +105 -3
- package/dist/core/group/extractors/http-patterns/node.js +130 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
- package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
- package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
- package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/call-extractors/generic.js +59 -0
- package/dist/core/ingestion/call-processor.d.ts +2 -4
- package/dist/core/ingestion/call-processor.js +221 -89
- package/dist/core/ingestion/call-routing.d.ts +8 -12
- package/dist/core/ingestion/call-routing.js +13 -34
- package/dist/core/ingestion/call-types.d.ts +135 -0
- package/dist/core/ingestion/call-types.js +2 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
- package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
- package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
- package/dist/core/ingestion/field-types.d.ts +1 -1
- package/dist/core/ingestion/heritage-extractors/configs/go.d.ts +13 -0
- package/dist/core/ingestion/heritage-extractors/configs/go.js +20 -0
- package/dist/core/ingestion/heritage-extractors/configs/ruby.d.ts +18 -0
- package/dist/core/ingestion/heritage-extractors/configs/ruby.js +65 -0
- package/dist/core/ingestion/heritage-extractors/generic.d.ts +23 -0
- package/dist/core/ingestion/heritage-extractors/generic.js +47 -0
- package/dist/core/ingestion/heritage-processor.d.ts +9 -0
- package/dist/core/ingestion/heritage-processor.js +120 -85
- package/dist/core/ingestion/heritage-types.d.ts +73 -0
- package/dist/core/ingestion/heritage-types.js +2 -0
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
- package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
- package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
- package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
- package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
- package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
- package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
- package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
- package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
- package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/go.js +4 -19
- package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
- package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
- package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/php.js +4 -7
- package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
- package/dist/core/ingestion/import-resolvers/python.js +3 -18
- package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
- package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
- package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
- package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/rust.js +4 -47
- package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
- package/dist/core/ingestion/import-resolvers/standard.js +7 -8
- package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
- package/dist/core/ingestion/language-provider.d.ts +80 -0
- package/dist/core/ingestion/languages/c-cpp.js +18 -12
- package/dist/core/ingestion/languages/csharp.js +13 -21
- package/dist/core/ingestion/languages/dart.js +13 -7
- package/dist/core/ingestion/languages/go.js +14 -20
- package/dist/core/ingestion/languages/java.js +13 -18
- package/dist/core/ingestion/languages/kotlin.js +13 -13
- package/dist/core/ingestion/languages/php.js +13 -7
- package/dist/core/ingestion/languages/python.js +13 -7
- package/dist/core/ingestion/languages/ruby.js +103 -22
- package/dist/core/ingestion/languages/rust.js +13 -7
- package/dist/core/ingestion/languages/swift.js +13 -18
- package/dist/core/ingestion/languages/typescript.js +18 -23
- package/dist/core/ingestion/languages/vue.js +13 -17
- package/dist/core/ingestion/model/heritage-map.d.ts +35 -0
- package/dist/core/ingestion/model/heritage-map.js +110 -9
- package/dist/core/ingestion/model/index.d.ts +2 -2
- package/dist/core/ingestion/model/index.js +1 -1
- package/dist/core/ingestion/model/resolve.d.ts +33 -28
- package/dist/core/ingestion/model/resolve.js +111 -27
- package/dist/core/ingestion/parsing-processor.d.ts +1 -2
- package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +1 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +9 -3
- package/dist/core/ingestion/pipeline-phases/parse.d.ts +7 -0
- package/dist/core/ingestion/pipeline.d.ts +11 -0
- package/dist/core/ingestion/pipeline.js +9 -2
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
- package/dist/core/ingestion/tree-sitter-queries.js +81 -0
- package/dist/core/ingestion/type-env.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.js +22 -2
- package/dist/core/ingestion/utils/ruby-self-call.d.ts +52 -0
- package/dist/core/ingestion/utils/ruby-self-call.js +59 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
- package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
- package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
- package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
- package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
- package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/variable-extractors/generic.js +80 -0
- package/dist/core/ingestion/variable-types.d.ts +82 -0
- package/dist/core/ingestion/variable-types.js +2 -0
- package/dist/core/ingestion/workers/parse-worker.js +244 -217
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/lbug/csv-generator.js +1 -0
- package/dist/core/lbug/lbug-adapter.d.ts +4 -5
- package/dist/core/lbug/lbug-adapter.js +38 -14
- package/dist/core/lbug/schema.d.ts +2 -1
- package/dist/core/lbug/schema.js +10 -1
- package/dist/core/run-analyze.js +6 -7
- package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
- package/dist/core/tree-sitter/parser-loader.js +17 -8
- package/dist/mcp/local/local-backend.js +29 -19
- package/dist/server/api.js +2 -0
- package/dist/types/pipeline.d.ts +6 -0
- package/package.json +8 -7
- package/scripts/build-tree-sitter-proto.cjs +82 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
- package/vendor/tree-sitter-proto/package.json +1 -7
- package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
- package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
- package/dist/core/ingestion/call-sites/java.d.ts +0 -9
- package/dist/core/ingestion/call-sites/java.js +0 -30
- package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
- package/dist/core/ingestion/import-resolvers/vue.js +0 -9
- package/scripts/preinstall-cleanup.cjs +0 -34
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Orchestrates the background embedding process:
|
|
5
5
|
* 1. Query embeddable nodes from LadybugDB
|
|
6
|
-
* 2. Generate text representations
|
|
7
|
-
* 3.
|
|
8
|
-
* 4. Update LadybugDB with embeddings
|
|
6
|
+
* 2. Generate text representations with enriched metadata
|
|
7
|
+
* 3. Chunk long nodes, batch embed
|
|
8
|
+
* 4. Update LadybugDB with chunk-aware embeddings
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
|
-
import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult } from './types.js';
|
|
11
|
+
import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
|
|
12
12
|
/**
|
|
13
13
|
* Compute a stable content fingerprint for an embeddable node.
|
|
14
14
|
* Used to detect when the underlying text has changed so stale vectors
|
|
@@ -20,6 +20,17 @@ export declare const contentHashForNode: (node: EmbeddableNode, config?: Partial
|
|
|
20
20
|
* Progress callback type
|
|
21
21
|
*/
|
|
22
22
|
export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
23
|
+
/**
|
|
24
|
+
* Batch INSERT chunk-aware embeddings into CodeEmbedding table
|
|
25
|
+
*/
|
|
26
|
+
export declare const batchInsertEmbeddings: (executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, updates: Array<{
|
|
27
|
+
nodeId: string;
|
|
28
|
+
chunkIndex: number;
|
|
29
|
+
startLine: number;
|
|
30
|
+
endLine: number;
|
|
31
|
+
embedding: number[];
|
|
32
|
+
contentHash?: string;
|
|
33
|
+
}>) => Promise<void>;
|
|
23
34
|
/**
|
|
24
35
|
* Run the embedding pipeline
|
|
25
36
|
*
|
|
@@ -27,34 +38,19 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
|
27
38
|
* @param executeWithReusedStatement - Function to execute with reused prepared statement
|
|
28
39
|
* @param onProgress - Callback for progress updates
|
|
29
40
|
* @param config - Optional configuration override
|
|
41
|
+
* @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
|
|
42
|
+
* @param context - Optional repo/server context for metadata enrichment
|
|
30
43
|
* @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
|
|
31
44
|
* Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
|
|
32
45
|
* and re-embedded; nodes not in the map are embedded fresh.
|
|
46
|
+
|
|
33
47
|
*/
|
|
34
|
-
export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, existingEmbeddings?: Map<string, string>) => Promise<void>;
|
|
48
|
+
export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>, context?: EmbeddingContext, existingEmbeddings?: Map<string, string>) => Promise<void>;
|
|
35
49
|
/**
|
|
36
|
-
* Perform semantic search using the vector index
|
|
37
|
-
*
|
|
38
|
-
* Uses CodeEmbedding table and queries each node table to get metadata
|
|
39
|
-
*
|
|
40
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
41
|
-
* @param query - Search query text
|
|
42
|
-
* @param k - Number of results to return (default: 10)
|
|
43
|
-
* @param maxDistance - Maximum distance threshold (default: 0.5)
|
|
44
|
-
* @returns Array of search results ordered by relevance
|
|
50
|
+
* Perform semantic search using the vector index with chunk deduplication
|
|
45
51
|
*/
|
|
46
52
|
export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
|
|
47
53
|
/**
|
|
48
54
|
* Semantic search with graph expansion (flattened results)
|
|
49
|
-
*
|
|
50
|
-
* Note: With multi-table schema, graph traversal is simplified.
|
|
51
|
-
* Returns semantic matches with their metadata.
|
|
52
|
-
* For full graph traversal, use execute_vector_cypher tool directly.
|
|
53
|
-
*
|
|
54
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
55
|
-
* @param query - Search query text
|
|
56
|
-
* @param k - Number of initial semantic matches (default: 5)
|
|
57
|
-
* @param _hops - Unused (kept for API compatibility).
|
|
58
|
-
* @returns Semantic matches with metadata
|
|
59
55
|
*/
|
|
60
56
|
export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;
|
|
@@ -3,16 +3,18 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Orchestrates the background embedding process:
|
|
5
5
|
* 1. Query embeddable nodes from LadybugDB
|
|
6
|
-
* 2. Generate text representations
|
|
7
|
-
* 3.
|
|
8
|
-
* 4. Update LadybugDB with embeddings
|
|
6
|
+
* 2. Generate text representations with enriched metadata
|
|
7
|
+
* 3. Chunk long nodes, batch embed
|
|
8
|
+
* 4. Update LadybugDB with chunk-aware embeddings
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
11
|
import { createHash } from 'crypto';
|
|
12
12
|
import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady, } from './embedder.js';
|
|
13
|
-
import { generateEmbeddingText
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
13
|
+
import { generateEmbeddingText } from './text-generator.js';
|
|
14
|
+
import { chunkNode, characterChunk } from './chunker.js';
|
|
15
|
+
import { extractStructuralNames } from './structural-extractor.js';
|
|
16
|
+
import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
|
|
17
|
+
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
|
|
16
18
|
import { loadVectorExtension } from '../lbug/lbug-adapter.js';
|
|
17
19
|
const isDev = process.env.NODE_ENV === 'development';
|
|
18
20
|
/**
|
|
@@ -22,38 +24,55 @@ const isDev = process.env.NODE_ENV === 'development';
|
|
|
22
24
|
* vector-indexed rows).
|
|
23
25
|
*/
|
|
24
26
|
export const contentHashForNode = (node, config = {}) => {
|
|
25
|
-
|
|
27
|
+
// Hash must be deterministic across runs, so exclude methodNames/fieldNames
|
|
28
|
+
// which are populated during the batch loop via AST extraction.
|
|
29
|
+
// Using only node.content ensures the hash stays stable.
|
|
30
|
+
const text = generateEmbeddingText({ ...node, methodNames: undefined, fieldNames: undefined }, node.content, config);
|
|
26
31
|
return createHash('sha1').update(text).digest('hex');
|
|
27
32
|
};
|
|
28
33
|
/**
|
|
29
34
|
* Query all embeddable nodes from LadybugDB
|
|
30
|
-
* Uses table-specific queries
|
|
35
|
+
* Uses table-specific queries for different label types
|
|
31
36
|
*/
|
|
32
37
|
const queryEmbeddableNodes = async (executeQuery) => {
|
|
33
38
|
const allNodes = [];
|
|
34
|
-
// Query each embeddable table with table-specific columns
|
|
35
39
|
for (const label of EMBEDDABLE_LABELS) {
|
|
36
40
|
try {
|
|
37
41
|
let query;
|
|
38
|
-
if (label === '
|
|
39
|
-
//
|
|
42
|
+
if (label === 'Method') {
|
|
43
|
+
// Method has parameterCount and returnType
|
|
40
44
|
query = `
|
|
41
|
-
MATCH (n:
|
|
42
|
-
RETURN n.id AS id, n.name AS name, '
|
|
43
|
-
n.filePath AS filePath, n.content AS content
|
|
45
|
+
MATCH (n:Method)
|
|
46
|
+
RETURN n.id AS id, n.name AS name, 'Method' AS label,
|
|
47
|
+
n.filePath AS filePath, n.content AS content,
|
|
48
|
+
n.startLine AS startLine, n.endLine AS endLine,
|
|
49
|
+
n.isExported AS isExported, n.description AS description,
|
|
50
|
+
n.parameterCount AS parameterCount, n.returnType AS returnType
|
|
51
|
+
`;
|
|
52
|
+
}
|
|
53
|
+
else if (LABELS_WITH_EXPORTED.has(label)) {
|
|
54
|
+
// Function, Class, Interface have isExported and description
|
|
55
|
+
query = `
|
|
56
|
+
MATCH (n:\`${label}\`)
|
|
57
|
+
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
58
|
+
n.filePath AS filePath, n.content AS content,
|
|
59
|
+
n.startLine AS startLine, n.endLine AS endLine,
|
|
60
|
+
n.isExported AS isExported, n.description AS description
|
|
44
61
|
`;
|
|
45
62
|
}
|
|
46
63
|
else {
|
|
47
|
-
//
|
|
64
|
+
// Multi-language tables (Struct, Enum, etc.) — have description but no isExported
|
|
48
65
|
query = `
|
|
49
|
-
MATCH (n
|
|
50
|
-
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
66
|
+
MATCH (n:\`${label}\`)
|
|
67
|
+
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
51
68
|
n.filePath AS filePath, n.content AS content,
|
|
52
|
-
n.startLine AS startLine, n.endLine AS endLine
|
|
69
|
+
n.startLine AS startLine, n.endLine AS endLine,
|
|
70
|
+
n.description AS description
|
|
53
71
|
`;
|
|
54
72
|
}
|
|
55
73
|
const rows = await executeQuery(query);
|
|
56
74
|
for (const row of rows) {
|
|
75
|
+
const hasExportedColumn = label === 'Method' || LABELS_WITH_EXPORTED.has(label);
|
|
57
76
|
allNodes.push({
|
|
58
77
|
id: row.id ?? row[0],
|
|
59
78
|
name: row.name ?? row[1],
|
|
@@ -62,11 +81,18 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
62
81
|
content: row.content ?? row[4] ?? '',
|
|
63
82
|
startLine: row.startLine ?? row[5],
|
|
64
83
|
endLine: row.endLine ?? row[6],
|
|
84
|
+
isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
|
|
85
|
+
description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
|
|
86
|
+
...(label === 'Method'
|
|
87
|
+
? {
|
|
88
|
+
parameterCount: row.parameterCount ?? row[9],
|
|
89
|
+
returnType: row.returnType ?? row[10],
|
|
90
|
+
}
|
|
91
|
+
: {}),
|
|
65
92
|
});
|
|
66
93
|
}
|
|
67
94
|
}
|
|
68
95
|
catch (error) {
|
|
69
|
-
// Table might not exist or be empty, continue
|
|
70
96
|
if (isDev) {
|
|
71
97
|
console.warn(`Query for ${label} nodes failed:`, error);
|
|
72
98
|
}
|
|
@@ -75,25 +101,28 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
75
101
|
return allNodes;
|
|
76
102
|
};
|
|
77
103
|
/**
|
|
78
|
-
* Batch INSERT embeddings into
|
|
79
|
-
* Using a separate lightweight table avoids copy-on-write overhead
|
|
80
|
-
* that occurs when UPDATEing nodes with large content fields
|
|
104
|
+
* Batch INSERT chunk-aware embeddings into CodeEmbedding table
|
|
81
105
|
*/
|
|
82
|
-
const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
|
|
83
|
-
|
|
84
|
-
const cypher = `MERGE (e:${EMBEDDING_TABLE_NAME} {nodeId: $nodeId}) SET e.embedding = $embedding, e.contentHash = $contentHash`;
|
|
106
|
+
export const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
|
|
107
|
+
const cypher = `CREATE (e:${EMBEDDING_TABLE_NAME} {id: $id, nodeId: $nodeId, chunkIndex: $chunkIndex, startLine: $startLine, endLine: $endLine, embedding: $embedding, contentHash: $contentHash})`;
|
|
85
108
|
const paramsList = updates.map((u) => ({
|
|
86
|
-
|
|
109
|
+
id: `${u.nodeId}:${u.chunkIndex}`,
|
|
110
|
+
nodeId: u.nodeId,
|
|
111
|
+
chunkIndex: u.chunkIndex,
|
|
112
|
+
startLine: u.startLine,
|
|
113
|
+
endLine: u.endLine,
|
|
87
114
|
embedding: u.embedding,
|
|
88
|
-
contentHash: u.contentHash,
|
|
115
|
+
contentHash: u.contentHash ?? STALE_HASH_SENTINEL,
|
|
89
116
|
}));
|
|
90
117
|
await executeWithReusedStatement(cypher, paramsList);
|
|
91
118
|
};
|
|
92
119
|
/**
|
|
93
120
|
* Create the vector index for semantic search
|
|
121
|
+
|
|
94
122
|
* Now indexes the separate CodeEmbedding table.
|
|
95
123
|
* Delegates extension loading to lbug-adapter's loadVectorExtension(),
|
|
96
124
|
* which owns the VECTOR extension lifecycle and state tracking.
|
|
125
|
+
|
|
97
126
|
*/
|
|
98
127
|
const createVectorIndex = async (executeQuery) => {
|
|
99
128
|
// Delegate to the adapter which tracks loaded state and handles DB reconnect resets
|
|
@@ -102,7 +131,6 @@ const createVectorIndex = async (executeQuery) => {
|
|
|
102
131
|
await executeQuery(CREATE_VECTOR_INDEX_QUERY);
|
|
103
132
|
}
|
|
104
133
|
catch (error) {
|
|
105
|
-
// Index might already exist
|
|
106
134
|
if (isDev) {
|
|
107
135
|
console.warn('Vector index creation warning:', error);
|
|
108
136
|
}
|
|
@@ -115,11 +143,14 @@ const createVectorIndex = async (executeQuery) => {
|
|
|
115
143
|
* @param executeWithReusedStatement - Function to execute with reused prepared statement
|
|
116
144
|
* @param onProgress - Callback for progress updates
|
|
117
145
|
* @param config - Optional configuration override
|
|
146
|
+
* @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
|
|
147
|
+
* @param context - Optional repo/server context for metadata enrichment
|
|
118
148
|
* @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
|
|
119
149
|
* Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
|
|
120
150
|
* and re-embedded; nodes not in the map are embedded fresh.
|
|
151
|
+
|
|
121
152
|
*/
|
|
122
|
-
export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, existingEmbeddings) => {
|
|
153
|
+
export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds, context, existingEmbeddings) => {
|
|
123
154
|
const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
|
|
124
155
|
try {
|
|
125
156
|
// Phase 1: Load embedding model
|
|
@@ -148,6 +179,13 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
148
179
|
}
|
|
149
180
|
// Phase 2: Query embeddable nodes
|
|
150
181
|
let nodes = await queryEmbeddableNodes(executeQuery);
|
|
182
|
+
// Apply context metadata
|
|
183
|
+
if (context?.repoName) {
|
|
184
|
+
for (const node of nodes) {
|
|
185
|
+
node.repoName = context.repoName;
|
|
186
|
+
node.serverName = context.serverName;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
151
189
|
// Incremental mode: compare content hashes, delete stale rows, skip fresh ones.
|
|
152
190
|
// Computed hashes for stale nodes are cached so batchInsertEmbeddings can reuse them
|
|
153
191
|
// (avoids double computation).
|
|
@@ -211,43 +249,99 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
211
249
|
});
|
|
212
250
|
return;
|
|
213
251
|
}
|
|
214
|
-
// Phase 3:
|
|
252
|
+
// Phase 3: Chunk + embed nodes
|
|
215
253
|
const batchSize = finalConfig.batchSize;
|
|
216
|
-
const
|
|
254
|
+
const chunkSize = finalConfig.chunkSize;
|
|
255
|
+
const overlap = finalConfig.overlap;
|
|
217
256
|
let processedNodes = 0;
|
|
257
|
+
let totalChunks = 0;
|
|
218
258
|
onProgress({
|
|
219
259
|
phase: 'embedding',
|
|
220
260
|
percent: 20,
|
|
221
261
|
nodesProcessed: 0,
|
|
222
262
|
totalNodes,
|
|
223
263
|
currentBatch: 0,
|
|
224
|
-
totalBatches,
|
|
264
|
+
totalBatches: Math.ceil(totalNodes / batchSize),
|
|
225
265
|
});
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
const
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
const
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
266
|
+
// Process in batches of nodes
|
|
267
|
+
for (let batchIndex = 0; batchIndex < totalNodes; batchIndex += batchSize) {
|
|
268
|
+
const batch = nodes.slice(batchIndex, batchIndex + batchSize);
|
|
269
|
+
// Chunk each node and generate text
|
|
270
|
+
const allTexts = [];
|
|
271
|
+
const allUpdates = [];
|
|
272
|
+
for (const node of batch) {
|
|
273
|
+
const isShort = isShortLabel(node.label);
|
|
274
|
+
const startLine = node.startLine ?? 0;
|
|
275
|
+
const endLine = node.endLine ?? 0;
|
|
276
|
+
// Extract structural names for class-like nodes via AST extractors
|
|
277
|
+
if (!isShort && STRUCTURAL_LABELS.has(node.label)) {
|
|
278
|
+
try {
|
|
279
|
+
const names = await extractStructuralNames(node.content, node.filePath);
|
|
280
|
+
node.methodNames = names.methodNames;
|
|
281
|
+
node.fieldNames = names.fieldNames;
|
|
282
|
+
}
|
|
283
|
+
catch {
|
|
284
|
+
// AST extraction failed — names stay undefined, text-generator handles gracefully
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
// Compute content hash once per node (re-use cached value for stale nodes)
|
|
288
|
+
const hash = computedStaleHashes.get(node.id) ?? contentHashForNode(node, finalConfig);
|
|
289
|
+
let chunks;
|
|
290
|
+
if (isShort) {
|
|
291
|
+
chunks = [{ text: node.content, chunkIndex: 0, startLine, endLine }];
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
try {
|
|
295
|
+
chunks = await chunkNode(node.label, node.content, node.filePath, startLine, endLine, chunkSize, overlap);
|
|
296
|
+
}
|
|
297
|
+
catch (chunkErr) {
|
|
298
|
+
if (isDev) {
|
|
299
|
+
console.warn(`⚠️ AST chunking failed for ${node.label} "${node.name}" (${node.filePath}), falling back to character-based chunking:`, chunkErr);
|
|
300
|
+
}
|
|
301
|
+
chunks = characterChunk(node.content, startLine, endLine, chunkSize, overlap);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
for (const chunk of chunks) {
|
|
305
|
+
const text = generateEmbeddingText(node, chunk.text, finalConfig);
|
|
306
|
+
allTexts.push(text);
|
|
307
|
+
allUpdates.push({
|
|
308
|
+
nodeId: node.id,
|
|
309
|
+
chunkIndex: chunk.chunkIndex,
|
|
310
|
+
startLine: chunk.startLine,
|
|
311
|
+
endLine: chunk.endLine,
|
|
312
|
+
contentHash: hash,
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Embed chunk texts in sub-batches to control memory
|
|
317
|
+
const EMBED_SUB_BATCH = 8;
|
|
318
|
+
for (let si = 0; si < allTexts.length; si += EMBED_SUB_BATCH) {
|
|
319
|
+
const subTexts = allTexts.slice(si, si + EMBED_SUB_BATCH);
|
|
320
|
+
const subUpdates = allUpdates.slice(si, si + EMBED_SUB_BATCH);
|
|
321
|
+
let embeddings;
|
|
322
|
+
try {
|
|
323
|
+
embeddings = await embedBatch(subTexts);
|
|
324
|
+
}
|
|
325
|
+
catch (embedErr) {
|
|
326
|
+
console.error(`❌ embedBatch failed for ${subTexts.length} texts (first: "${subTexts[0]?.substring(0, 80)}..."):`, embedErr);
|
|
327
|
+
throw embedErr;
|
|
328
|
+
}
|
|
329
|
+
const dbUpdates = subUpdates.map((u, i) => ({
|
|
330
|
+
...u,
|
|
331
|
+
embedding: embeddingToArray(embeddings[i]),
|
|
332
|
+
}));
|
|
333
|
+
await batchInsertEmbeddings(executeWithReusedStatement, dbUpdates);
|
|
334
|
+
}
|
|
241
335
|
processedNodes += batch.length;
|
|
242
|
-
|
|
336
|
+
totalChunks += allUpdates.length;
|
|
243
337
|
const embeddingProgress = 20 + (processedNodes / totalNodes) * 70;
|
|
244
338
|
onProgress({
|
|
245
339
|
phase: 'embedding',
|
|
246
340
|
percent: Math.round(embeddingProgress),
|
|
247
341
|
nodesProcessed: processedNodes,
|
|
248
342
|
totalNodes,
|
|
249
|
-
currentBatch: batchIndex + 1,
|
|
250
|
-
totalBatches,
|
|
343
|
+
currentBatch: Math.floor(batchIndex / batchSize) + 1,
|
|
344
|
+
totalBatches: Math.ceil(totalNodes / batchSize),
|
|
251
345
|
});
|
|
252
346
|
}
|
|
253
347
|
// Phase 4: Create vector index
|
|
@@ -261,7 +355,6 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
261
355
|
console.log('📇 Creating vector index...');
|
|
262
356
|
}
|
|
263
357
|
await createVectorIndex(executeQuery);
|
|
264
|
-
// Complete
|
|
265
358
|
onProgress({
|
|
266
359
|
phase: 'ready',
|
|
267
360
|
percent: 100,
|
|
@@ -269,7 +362,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
269
362
|
totalNodes,
|
|
270
363
|
});
|
|
271
364
|
if (isDev) {
|
|
272
|
-
console.log(
|
|
365
|
+
console.log(`✅ Embedding pipeline complete! (${totalChunks} chunks from ${totalNodes} nodes)`);
|
|
273
366
|
}
|
|
274
367
|
}
|
|
275
368
|
catch (error) {
|
|
@@ -286,68 +379,57 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
286
379
|
}
|
|
287
380
|
};
|
|
288
381
|
/**
|
|
289
|
-
* Perform semantic search using the vector index
|
|
290
|
-
*
|
|
291
|
-
* Uses CodeEmbedding table and queries each node table to get metadata
|
|
292
|
-
*
|
|
293
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
294
|
-
* @param query - Search query text
|
|
295
|
-
* @param k - Number of results to return (default: 10)
|
|
296
|
-
* @param maxDistance - Maximum distance threshold (default: 0.5)
|
|
297
|
-
* @returns Array of search results ordered by relevance
|
|
382
|
+
* Perform semantic search using the vector index with chunk deduplication
|
|
298
383
|
*/
|
|
299
384
|
export const semanticSearch = async (executeQuery, query, k = 10, maxDistance = 0.5) => {
|
|
300
385
|
if (!isEmbedderReady()) {
|
|
301
386
|
throw new Error('Embedding model not initialized. Run embedding pipeline first.');
|
|
302
387
|
}
|
|
303
|
-
// Embed the query
|
|
304
388
|
const queryEmbedding = await embedText(query);
|
|
305
389
|
const queryVec = embeddingToArray(queryEmbedding);
|
|
306
390
|
const queryVecStr = `[${queryVec.join(',')}]`;
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
391
|
+
const bestChunks = await collectBestChunks(k, async (fetchLimit) => {
|
|
392
|
+
const vectorQuery = `
|
|
393
|
+
CALL QUERY_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}',
|
|
394
|
+
CAST(${queryVecStr} AS FLOAT[${queryVec.length}]), ${fetchLimit})
|
|
395
|
+
YIELD node AS emb, distance
|
|
396
|
+
WITH emb, distance
|
|
397
|
+
WHERE distance < ${maxDistance}
|
|
398
|
+
RETURN emb.nodeId AS nodeId, emb.chunkIndex AS chunkIndex,
|
|
399
|
+
emb.startLine AS startLine, emb.endLine AS endLine, distance
|
|
400
|
+
ORDER BY distance
|
|
401
|
+
`;
|
|
402
|
+
const embResults = await executeQuery(vectorQuery);
|
|
403
|
+
return embResults.map((row) => ({
|
|
404
|
+
nodeId: row.nodeId ?? row[0],
|
|
405
|
+
chunkIndex: row.chunkIndex ?? row[1] ?? 0,
|
|
406
|
+
startLine: row.startLine ?? row[2] ?? 0,
|
|
407
|
+
endLine: row.endLine ?? row[3] ?? 0,
|
|
408
|
+
distance: row.distance ?? row[4],
|
|
409
|
+
}));
|
|
410
|
+
});
|
|
411
|
+
if (bestChunks.size === 0) {
|
|
319
412
|
return [];
|
|
320
413
|
}
|
|
321
414
|
// Group results by label for batched metadata queries
|
|
322
415
|
const byLabel = new Map();
|
|
323
|
-
for (const
|
|
324
|
-
const nodeId = embRow.nodeId ?? embRow[0];
|
|
325
|
-
const distance = embRow.distance ?? embRow[1];
|
|
416
|
+
for (const [nodeId, chunk] of Array.from(bestChunks.entries()).slice(0, k)) {
|
|
326
417
|
const labelEndIdx = nodeId.indexOf(':');
|
|
327
418
|
const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown';
|
|
328
419
|
if (!byLabel.has(label))
|
|
329
420
|
byLabel.set(label, []);
|
|
330
|
-
byLabel.get(label).push({ nodeId,
|
|
421
|
+
byLabel.get(label).push({ nodeId, ...chunk });
|
|
331
422
|
}
|
|
332
423
|
// Batch-fetch metadata per label
|
|
333
424
|
const results = [];
|
|
334
425
|
for (const [label, items] of byLabel) {
|
|
335
426
|
const idList = items.map((i) => `'${i.nodeId.replace(/'/g, "''")}'`).join(', ');
|
|
336
427
|
try {
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
`;
|
|
343
|
-
}
|
|
344
|
-
else {
|
|
345
|
-
nodeQuery = `
|
|
346
|
-
MATCH (n:${label}) WHERE n.id IN [${idList}]
|
|
347
|
-
RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
|
|
348
|
-
n.startLine AS startLine, n.endLine AS endLine
|
|
349
|
-
`;
|
|
350
|
-
}
|
|
428
|
+
const nodeQuery = `
|
|
429
|
+
MATCH (n:\`${label}\`) WHERE n.id IN [${idList}]
|
|
430
|
+
RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
|
|
431
|
+
n.startLine AS startLine, n.endLine AS endLine
|
|
432
|
+
`;
|
|
351
433
|
const nodeRows = await executeQuery(nodeQuery);
|
|
352
434
|
const rowMap = new Map();
|
|
353
435
|
for (const row of nodeRows) {
|
|
@@ -363,8 +445,8 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
|
|
|
363
445
|
label,
|
|
364
446
|
filePath: nodeRow.filePath ?? nodeRow[2] ?? '',
|
|
365
447
|
distance: item.distance,
|
|
366
|
-
startLine:
|
|
367
|
-
endLine:
|
|
448
|
+
startLine: item.startLine,
|
|
449
|
+
endLine: item.endLine,
|
|
368
450
|
});
|
|
369
451
|
}
|
|
370
452
|
}
|
|
@@ -373,26 +455,13 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
|
|
|
373
455
|
// Table might not exist, skip
|
|
374
456
|
}
|
|
375
457
|
}
|
|
376
|
-
// Re-sort by distance since batch queries may have mixed order
|
|
377
458
|
results.sort((a, b) => a.distance - b.distance);
|
|
378
459
|
return results;
|
|
379
460
|
};
|
|
380
461
|
/**
|
|
381
462
|
* Semantic search with graph expansion (flattened results)
|
|
382
|
-
*
|
|
383
|
-
* Note: With multi-table schema, graph traversal is simplified.
|
|
384
|
-
* Returns semantic matches with their metadata.
|
|
385
|
-
* For full graph traversal, use execute_vector_cypher tool directly.
|
|
386
|
-
*
|
|
387
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
388
|
-
* @param query - Search query text
|
|
389
|
-
* @param k - Number of initial semantic matches (default: 5)
|
|
390
|
-
* @param _hops - Unused (kept for API compatibility).
|
|
391
|
-
* @returns Semantic matches with metadata
|
|
392
463
|
*/
|
|
393
464
|
export const semanticSearchWithContext = async (executeQuery, query, k = 5, _hops = 1) => {
|
|
394
|
-
// For multi-table schema, just return semantic search results
|
|
395
|
-
// Graph traversal is complex with separate tables - use execute_vector_cypher instead
|
|
396
465
|
const results = await semanticSearch(executeQuery, query, k, 0.5);
|
|
397
466
|
return results.map((r) => ({
|
|
398
467
|
matchId: r.nodeId,
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export interface ResolvedLineRange {
|
|
2
|
+
startLine: number;
|
|
3
|
+
endLine: number;
|
|
4
|
+
}
|
|
5
|
+
export declare const buildLineIndex: (content: string) => Int32Array;
|
|
6
|
+
export declare const lineFromOffset: (lineOffsets: Int32Array, charOffset: number) => number;
|
|
7
|
+
export declare const resolveChunkLines: (lineOffsets: Int32Array, startOffset: number, endOffset: number, baseStartLine: number) => ResolvedLineRange;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export const buildLineIndex = (content) => {
|
|
2
|
+
const offsets = [0];
|
|
3
|
+
for (let i = 0; i < content.length; i++) {
|
|
4
|
+
if (content.charCodeAt(i) === 10)
|
|
5
|
+
offsets.push(i + 1);
|
|
6
|
+
}
|
|
7
|
+
return new Int32Array(offsets);
|
|
8
|
+
};
|
|
9
|
+
const clampOffset = (lineOffsets, charOffset) => {
|
|
10
|
+
if (lineOffsets.length === 0)
|
|
11
|
+
return 0;
|
|
12
|
+
const maxOffset = lineOffsets[lineOffsets.length - 1];
|
|
13
|
+
if (charOffset < 0)
|
|
14
|
+
return 0;
|
|
15
|
+
if (charOffset > maxOffset)
|
|
16
|
+
return maxOffset;
|
|
17
|
+
return charOffset;
|
|
18
|
+
};
|
|
19
|
+
export const lineFromOffset = (lineOffsets, charOffset) => {
|
|
20
|
+
if (lineOffsets.length === 0)
|
|
21
|
+
return 0;
|
|
22
|
+
const clamped = clampOffset(lineOffsets, charOffset);
|
|
23
|
+
let lo = 0;
|
|
24
|
+
let hi = lineOffsets.length - 1;
|
|
25
|
+
while (lo < hi) {
|
|
26
|
+
const mid = (lo + hi + 1) >> 1;
|
|
27
|
+
if (lineOffsets[mid] <= clamped)
|
|
28
|
+
lo = mid;
|
|
29
|
+
else
|
|
30
|
+
hi = mid - 1;
|
|
31
|
+
}
|
|
32
|
+
return lo;
|
|
33
|
+
};
|
|
34
|
+
export const resolveChunkLines = (lineOffsets, startOffset, endOffset, baseStartLine) => {
|
|
35
|
+
const relativeStartLine = lineFromOffset(lineOffsets, startOffset);
|
|
36
|
+
const effectiveEndOffset = endOffset > startOffset ? endOffset - 1 : startOffset;
|
|
37
|
+
const relativeEndLine = lineFromOffset(lineOffsets, effectiveEndOffset);
|
|
38
|
+
return {
|
|
39
|
+
startLine: baseStartLine + relativeStartLine,
|
|
40
|
+
endLine: baseStartLine + relativeEndLine,
|
|
41
|
+
};
|
|
42
|
+
};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server Mapping Configuration
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.gitnexus/server-mapping.json to map repo names to service names.
|
|
5
|
+
* Used in embedding text to enrich metadata with microservice context.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Read the server mapping file and return the serverName for a given repoName.
|
|
9
|
+
* Returns undefined if no mapping exists.
|
|
10
|
+
*/
|
|
11
|
+
export declare const readServerMapping: (repoName: string) => Promise<string | undefined>;
|
|
12
|
+
/**
|
|
13
|
+
* Clear the cached mapping (useful for testing or after file changes)
|
|
14
|
+
*/
|
|
15
|
+
export declare const clearServerMappingCache: () => void;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server Mapping Configuration
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.gitnexus/server-mapping.json to map repo names to service names.
|
|
5
|
+
* Used in embedding text to enrich metadata with microservice context.
|
|
6
|
+
*/
|
|
7
|
+
import fs from 'fs/promises';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import os from 'os';
|
|
10
|
+
const MAPPING_FILE = path.join(os.homedir(), '.gitnexus', 'server-mapping.json');
|
|
11
|
+
let cachedMapping = null;
|
|
12
|
+
/**
|
|
13
|
+
* Read the server mapping file and return the serverName for a given repoName.
|
|
14
|
+
* Returns undefined if no mapping exists.
|
|
15
|
+
*/
|
|
16
|
+
export const readServerMapping = async (repoName) => {
|
|
17
|
+
try {
|
|
18
|
+
if (!cachedMapping) {
|
|
19
|
+
const raw = await fs.readFile(MAPPING_FILE, 'utf-8');
|
|
20
|
+
cachedMapping = JSON.parse(raw);
|
|
21
|
+
}
|
|
22
|
+
return cachedMapping[repoName];
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return undefined;
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Clear the cached mapping (useful for testing or after file changes)
|
|
30
|
+
*/
|
|
31
|
+
export const clearServerMappingCache = () => {
|
|
32
|
+
cachedMapping = null;
|
|
33
|
+
};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structural Extractor Module
|
|
3
|
+
*
|
|
4
|
+
* Reuses ingestion pipeline's AST-based MethodExtractor / FieldExtractor
|
|
5
|
+
* to extract method and field names for embedding text generation.
|
|
6
|
+
*/
|
|
7
|
+
export interface StructuralNames {
|
|
8
|
+
methodNames: string[];
|
|
9
|
+
fieldNames: string[];
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Extract method and field names from a class/struct/interface node
|
|
13
|
+
* using the ingestion pipeline's AST extractors.
|
|
14
|
+
*/
|
|
15
|
+
export declare const extractStructuralNames: (content: string, filePath: string) => Promise<StructuralNames>;
|