gitnexus 1.6.2-rc.8 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/dist/_shared/lbug/schema-constants.d.ts +1 -1
  2. package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
  3. package/dist/_shared/lbug/schema-constants.js +1 -0
  4. package/dist/_shared/lbug/schema-constants.js.map +1 -1
  5. package/dist/_shared/mro-strategy.d.ts +38 -16
  6. package/dist/_shared/mro-strategy.d.ts.map +1 -1
  7. package/dist/cli/ai-context.js +0 -58
  8. package/dist/cli/analyze.js +3 -0
  9. package/dist/core/embeddings/ast-utils.d.ts +22 -0
  10. package/dist/core/embeddings/ast-utils.js +105 -0
  11. package/dist/core/embeddings/character-chunk.d.ts +12 -0
  12. package/dist/core/embeddings/character-chunk.js +43 -0
  13. package/dist/core/embeddings/chunker.d.ts +14 -0
  14. package/dist/core/embeddings/chunker.js +234 -0
  15. package/dist/core/embeddings/embedder.js +5 -0
  16. package/dist/core/embeddings/embedding-pipeline.d.ts +20 -24
  17. package/dist/core/embeddings/embedding-pipeline.js +176 -107
  18. package/dist/core/embeddings/line-index.d.ts +7 -0
  19. package/dist/core/embeddings/line-index.js +42 -0
  20. package/dist/core/embeddings/server-mapping.d.ts +15 -0
  21. package/dist/core/embeddings/server-mapping.js +33 -0
  22. package/dist/core/embeddings/structural-extractor.d.ts +15 -0
  23. package/dist/core/embeddings/structural-extractor.js +58 -0
  24. package/dist/core/embeddings/text-generator.d.ts +20 -13
  25. package/dist/core/embeddings/text-generator.js +151 -119
  26. package/dist/core/embeddings/types.d.ts +81 -3
  27. package/dist/core/embeddings/types.js +105 -3
  28. package/dist/core/group/extractors/http-patterns/node.js +130 -0
  29. package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
  30. package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
  31. package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
  32. package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
  33. package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
  34. package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
  35. package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
  36. package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
  37. package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
  38. package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
  39. package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
  40. package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
  41. package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
  42. package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
  43. package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
  44. package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
  45. package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
  46. package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
  47. package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
  48. package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
  49. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
  50. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
  51. package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
  52. package/dist/core/ingestion/call-extractors/generic.js +59 -0
  53. package/dist/core/ingestion/call-processor.d.ts +2 -4
  54. package/dist/core/ingestion/call-processor.js +221 -89
  55. package/dist/core/ingestion/call-routing.d.ts +8 -12
  56. package/dist/core/ingestion/call-routing.js +13 -34
  57. package/dist/core/ingestion/call-types.d.ts +135 -0
  58. package/dist/core/ingestion/call-types.js +2 -0
  59. package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
  60. package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
  61. package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
  62. package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
  63. package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
  64. package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
  65. package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
  66. package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
  67. package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
  68. package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
  69. package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
  70. package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
  71. package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
  72. package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
  73. package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
  74. package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
  75. package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
  76. package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
  77. package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
  78. package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
  79. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
  80. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
  81. package/dist/core/ingestion/field-types.d.ts +1 -1
  82. package/dist/core/ingestion/heritage-extractors/configs/go.d.ts +13 -0
  83. package/dist/core/ingestion/heritage-extractors/configs/go.js +20 -0
  84. package/dist/core/ingestion/heritage-extractors/configs/ruby.d.ts +18 -0
  85. package/dist/core/ingestion/heritage-extractors/configs/ruby.js +65 -0
  86. package/dist/core/ingestion/heritage-extractors/generic.d.ts +23 -0
  87. package/dist/core/ingestion/heritage-extractors/generic.js +47 -0
  88. package/dist/core/ingestion/heritage-processor.d.ts +9 -0
  89. package/dist/core/ingestion/heritage-processor.js +120 -85
  90. package/dist/core/ingestion/heritage-types.d.ts +73 -0
  91. package/dist/core/ingestion/heritage-types.js +2 -0
  92. package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
  93. package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
  94. package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
  95. package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
  96. package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
  97. package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
  98. package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
  99. package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
  100. package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
  101. package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
  102. package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
  103. package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
  104. package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
  105. package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
  106. package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
  107. package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
  108. package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
  109. package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
  110. package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
  111. package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
  112. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
  113. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
  114. package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
  115. package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
  116. package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
  117. package/dist/core/ingestion/import-resolvers/go.js +4 -19
  118. package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
  119. package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
  120. package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
  121. package/dist/core/ingestion/import-resolvers/php.js +4 -7
  122. package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
  123. package/dist/core/ingestion/import-resolvers/python.js +3 -18
  124. package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
  125. package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
  126. package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
  127. package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
  128. package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
  129. package/dist/core/ingestion/import-resolvers/rust.js +4 -47
  130. package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
  131. package/dist/core/ingestion/import-resolvers/standard.js +7 -8
  132. package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
  133. package/dist/core/ingestion/language-provider.d.ts +80 -0
  134. package/dist/core/ingestion/languages/c-cpp.js +18 -12
  135. package/dist/core/ingestion/languages/csharp.js +13 -21
  136. package/dist/core/ingestion/languages/dart.js +13 -7
  137. package/dist/core/ingestion/languages/go.js +14 -20
  138. package/dist/core/ingestion/languages/java.js +13 -18
  139. package/dist/core/ingestion/languages/kotlin.js +13 -13
  140. package/dist/core/ingestion/languages/php.js +13 -7
  141. package/dist/core/ingestion/languages/python.js +13 -7
  142. package/dist/core/ingestion/languages/ruby.js +103 -22
  143. package/dist/core/ingestion/languages/rust.js +13 -7
  144. package/dist/core/ingestion/languages/swift.js +13 -18
  145. package/dist/core/ingestion/languages/typescript.js +18 -23
  146. package/dist/core/ingestion/languages/vue.js +13 -17
  147. package/dist/core/ingestion/model/heritage-map.d.ts +35 -0
  148. package/dist/core/ingestion/model/heritage-map.js +110 -9
  149. package/dist/core/ingestion/model/index.d.ts +2 -2
  150. package/dist/core/ingestion/model/index.js +1 -1
  151. package/dist/core/ingestion/model/resolve.d.ts +33 -28
  152. package/dist/core/ingestion/model/resolve.js +111 -27
  153. package/dist/core/ingestion/parsing-processor.d.ts +1 -2
  154. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +1 -0
  155. package/dist/core/ingestion/pipeline-phases/parse-impl.js +9 -3
  156. package/dist/core/ingestion/pipeline-phases/parse.d.ts +7 -0
  157. package/dist/core/ingestion/pipeline.d.ts +11 -0
  158. package/dist/core/ingestion/pipeline.js +9 -2
  159. package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
  160. package/dist/core/ingestion/tree-sitter-queries.js +81 -0
  161. package/dist/core/ingestion/type-env.d.ts +1 -1
  162. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
  163. package/dist/core/ingestion/utils/ast-helpers.js +22 -2
  164. package/dist/core/ingestion/utils/ruby-self-call.d.ts +52 -0
  165. package/dist/core/ingestion/utils/ruby-self-call.js +59 -0
  166. package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
  167. package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
  168. package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
  169. package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
  170. package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
  171. package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
  172. package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
  173. package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
  174. package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
  175. package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
  176. package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
  177. package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
  178. package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
  179. package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
  180. package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
  181. package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
  182. package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
  183. package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
  184. package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
  185. package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
  186. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
  187. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
  188. package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
  189. package/dist/core/ingestion/variable-extractors/generic.js +80 -0
  190. package/dist/core/ingestion/variable-types.d.ts +82 -0
  191. package/dist/core/ingestion/variable-types.js +2 -0
  192. package/dist/core/ingestion/workers/parse-worker.js +244 -217
  193. package/dist/core/ingestion/workers/worker-pool.js +3 -0
  194. package/dist/core/lbug/csv-generator.js +1 -0
  195. package/dist/core/lbug/lbug-adapter.d.ts +4 -5
  196. package/dist/core/lbug/lbug-adapter.js +38 -14
  197. package/dist/core/lbug/schema.d.ts +2 -1
  198. package/dist/core/lbug/schema.js +10 -1
  199. package/dist/core/run-analyze.js +6 -7
  200. package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
  201. package/dist/core/tree-sitter/parser-loader.js +17 -8
  202. package/dist/mcp/core/embedder.js +5 -0
  203. package/dist/mcp/local/local-backend.js +29 -19
  204. package/dist/server/api.js +2 -0
  205. package/dist/types/pipeline.d.ts +6 -0
  206. package/package.json +8 -7
  207. package/scripts/build-tree-sitter-proto.cjs +82 -0
  208. package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
  209. package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
  210. package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
  211. package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
  212. package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
  213. package/vendor/tree-sitter-proto/package.json +1 -7
  214. package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
  215. package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
  216. package/dist/core/ingestion/call-sites/java.d.ts +0 -9
  217. package/dist/core/ingestion/call-sites/java.js +0 -30
  218. package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
  219. package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
  220. package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
  221. package/dist/core/ingestion/import-resolvers/vue.js +0 -9
  222. package/scripts/preinstall-cleanup.cjs +0 -34
@@ -3,12 +3,12 @@
3
3
  *
4
4
  * Orchestrates the background embedding process:
5
5
  * 1. Query embeddable nodes from LadybugDB
6
- * 2. Generate text representations
7
- * 3. Batch embed using transformers.js
8
- * 4. Update LadybugDB with embeddings
6
+ * 2. Generate text representations with enriched metadata
7
+ * 3. Chunk long nodes, batch embed
8
+ * 4. Update LadybugDB with chunk-aware embeddings
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
- import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult } from './types.js';
11
+ import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
12
12
  /**
13
13
  * Compute a stable content fingerprint for an embeddable node.
14
14
  * Used to detect when the underlying text has changed so stale vectors
@@ -20,6 +20,17 @@ export declare const contentHashForNode: (node: EmbeddableNode, config?: Partial
20
20
  * Progress callback type
21
21
  */
22
22
  export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
23
+ /**
24
+ * Batch INSERT chunk-aware embeddings into CodeEmbedding table
25
+ */
26
+ export declare const batchInsertEmbeddings: (executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, updates: Array<{
27
+ nodeId: string;
28
+ chunkIndex: number;
29
+ startLine: number;
30
+ endLine: number;
31
+ embedding: number[];
32
+ contentHash?: string;
33
+ }>) => Promise<void>;
23
34
  /**
24
35
  * Run the embedding pipeline
25
36
  *
@@ -27,34 +38,19 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
27
38
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
28
39
  * @param onProgress - Callback for progress updates
29
40
  * @param config - Optional configuration override
41
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
42
+ * @param context - Optional repo/server context for metadata enrichment
30
43
  * @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
31
44
  * Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
32
45
  * and re-embedded; nodes not in the map are embedded fresh.
46
+
33
47
  */
34
- export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, existingEmbeddings?: Map<string, string>) => Promise<void>;
48
+ export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>, context?: EmbeddingContext, existingEmbeddings?: Map<string, string>) => Promise<void>;
35
49
  /**
36
- * Perform semantic search using the vector index
37
- *
38
- * Uses CodeEmbedding table and queries each node table to get metadata
39
- *
40
- * @param executeQuery - Function to execute Cypher queries
41
- * @param query - Search query text
42
- * @param k - Number of results to return (default: 10)
43
- * @param maxDistance - Maximum distance threshold (default: 0.5)
44
- * @returns Array of search results ordered by relevance
50
+ * Perform semantic search using the vector index with chunk deduplication
45
51
  */
46
52
  export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
47
53
  /**
48
54
  * Semantic search with graph expansion (flattened results)
49
- *
50
- * Note: With multi-table schema, graph traversal is simplified.
51
- * Returns semantic matches with their metadata.
52
- * For full graph traversal, use execute_vector_cypher tool directly.
53
- *
54
- * @param executeQuery - Function to execute Cypher queries
55
- * @param query - Search query text
56
- * @param k - Number of initial semantic matches (default: 5)
57
- * @param _hops - Unused (kept for API compatibility).
58
- * @returns Semantic matches with metadata
59
55
  */
60
56
  export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;
@@ -3,16 +3,18 @@
3
3
  *
4
4
  * Orchestrates the background embedding process:
5
5
  * 1. Query embeddable nodes from LadybugDB
6
- * 2. Generate text representations
7
- * 3. Batch embed using transformers.js
8
- * 4. Update LadybugDB with embeddings
6
+ * 2. Generate text representations with enriched metadata
7
+ * 3. Chunk long nodes, batch embed
8
+ * 4. Update LadybugDB with chunk-aware embeddings
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
11
  import { createHash } from 'crypto';
12
12
  import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady, } from './embedder.js';
13
- import { generateEmbeddingText, generateBatchEmbeddingTexts } from './text-generator.js';
14
- import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
15
- import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, } from '../lbug/schema.js';
13
+ import { generateEmbeddingText } from './text-generator.js';
14
+ import { chunkNode, characterChunk } from './chunker.js';
15
+ import { extractStructuralNames } from './structural-extractor.js';
16
+ import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
17
+ import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
16
18
  import { loadVectorExtension } from '../lbug/lbug-adapter.js';
17
19
  const isDev = process.env.NODE_ENV === 'development';
18
20
  /**
@@ -22,38 +24,55 @@ const isDev = process.env.NODE_ENV === 'development';
22
24
  * vector-indexed rows).
23
25
  */
24
26
  export const contentHashForNode = (node, config = {}) => {
25
- const text = generateEmbeddingText(node, config);
27
+ // Hash must be deterministic across runs, so exclude methodNames/fieldNames
28
+ // which are populated during the batch loop via AST extraction.
29
+ // Using only node.content ensures the hash stays stable.
30
+ const text = generateEmbeddingText({ ...node, methodNames: undefined, fieldNames: undefined }, node.content, config);
26
31
  return createHash('sha1').update(text).digest('hex');
27
32
  };
28
33
  /**
29
34
  * Query all embeddable nodes from LadybugDB
30
- * Uses table-specific queries (File has different schema than code elements)
35
+ * Uses table-specific queries for different label types
31
36
  */
32
37
  const queryEmbeddableNodes = async (executeQuery) => {
33
38
  const allNodes = [];
34
- // Query each embeddable table with table-specific columns
35
39
  for (const label of EMBEDDABLE_LABELS) {
36
40
  try {
37
41
  let query;
38
- if (label === 'File') {
39
- // File nodes don't have startLine/endLine
42
+ if (label === 'Method') {
43
+ // Method has parameterCount and returnType
40
44
  query = `
41
- MATCH (n:File)
42
- RETURN n.id AS id, n.name AS name, 'File' AS label,
43
- n.filePath AS filePath, n.content AS content
45
+ MATCH (n:Method)
46
+ RETURN n.id AS id, n.name AS name, 'Method' AS label,
47
+ n.filePath AS filePath, n.content AS content,
48
+ n.startLine AS startLine, n.endLine AS endLine,
49
+ n.isExported AS isExported, n.description AS description,
50
+ n.parameterCount AS parameterCount, n.returnType AS returnType
51
+ `;
52
+ }
53
+ else if (LABELS_WITH_EXPORTED.has(label)) {
54
+ // Function, Class, Interface have isExported and description
55
+ query = `
56
+ MATCH (n:\`${label}\`)
57
+ RETURN n.id AS id, n.name AS name, '${label}' AS label,
58
+ n.filePath AS filePath, n.content AS content,
59
+ n.startLine AS startLine, n.endLine AS endLine,
60
+ n.isExported AS isExported, n.description AS description
44
61
  `;
45
62
  }
46
63
  else {
47
- // Code elements have startLine/endLine
64
+ // Multi-language tables (Struct, Enum, etc.) — have description but no isExported
48
65
  query = `
49
- MATCH (n:${label})
50
- RETURN n.id AS id, n.name AS name, '${label}' AS label,
66
+ MATCH (n:\`${label}\`)
67
+ RETURN n.id AS id, n.name AS name, '${label}' AS label,
51
68
  n.filePath AS filePath, n.content AS content,
52
- n.startLine AS startLine, n.endLine AS endLine
69
+ n.startLine AS startLine, n.endLine AS endLine,
70
+ n.description AS description
53
71
  `;
54
72
  }
55
73
  const rows = await executeQuery(query);
56
74
  for (const row of rows) {
75
+ const hasExportedColumn = label === 'Method' || LABELS_WITH_EXPORTED.has(label);
57
76
  allNodes.push({
58
77
  id: row.id ?? row[0],
59
78
  name: row.name ?? row[1],
@@ -62,11 +81,18 @@ const queryEmbeddableNodes = async (executeQuery) => {
62
81
  content: row.content ?? row[4] ?? '',
63
82
  startLine: row.startLine ?? row[5],
64
83
  endLine: row.endLine ?? row[6],
84
+ isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
85
+ description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
86
+ ...(label === 'Method'
87
+ ? {
88
+ parameterCount: row.parameterCount ?? row[9],
89
+ returnType: row.returnType ?? row[10],
90
+ }
91
+ : {}),
65
92
  });
66
93
  }
67
94
  }
68
95
  catch (error) {
69
- // Table might not exist or be empty, continue
70
96
  if (isDev) {
71
97
  console.warn(`Query for ${label} nodes failed:`, error);
72
98
  }
@@ -75,25 +101,28 @@ const queryEmbeddableNodes = async (executeQuery) => {
75
101
  return allNodes;
76
102
  };
77
103
  /**
78
- * Batch INSERT embeddings into separate CodeEmbedding table
79
- * Using a separate lightweight table avoids copy-on-write overhead
80
- * that occurs when UPDATEing nodes with large content fields
104
+ * Batch INSERT chunk-aware embeddings into CodeEmbedding table
81
105
  */
82
- const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
83
- // MERGE instead of CREATE idempotent, handles concurrent analyzes and partial prior runs
84
- const cypher = `MERGE (e:${EMBEDDING_TABLE_NAME} {nodeId: $nodeId}) SET e.embedding = $embedding, e.contentHash = $contentHash`;
106
+ export const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
107
+ const cypher = `CREATE (e:${EMBEDDING_TABLE_NAME} {id: $id, nodeId: $nodeId, chunkIndex: $chunkIndex, startLine: $startLine, endLine: $endLine, embedding: $embedding, contentHash: $contentHash})`;
85
108
  const paramsList = updates.map((u) => ({
86
- nodeId: u.id,
109
+ id: `${u.nodeId}:${u.chunkIndex}`,
110
+ nodeId: u.nodeId,
111
+ chunkIndex: u.chunkIndex,
112
+ startLine: u.startLine,
113
+ endLine: u.endLine,
87
114
  embedding: u.embedding,
88
- contentHash: u.contentHash,
115
+ contentHash: u.contentHash ?? STALE_HASH_SENTINEL,
89
116
  }));
90
117
  await executeWithReusedStatement(cypher, paramsList);
91
118
  };
92
119
  /**
93
120
  * Create the vector index for semantic search
121
+
94
122
  * Now indexes the separate CodeEmbedding table.
95
123
  * Delegates extension loading to lbug-adapter's loadVectorExtension(),
96
124
  * which owns the VECTOR extension lifecycle and state tracking.
125
+
97
126
  */
98
127
  const createVectorIndex = async (executeQuery) => {
99
128
  // Delegate to the adapter which tracks loaded state and handles DB reconnect resets
@@ -102,7 +131,6 @@ const createVectorIndex = async (executeQuery) => {
102
131
  await executeQuery(CREATE_VECTOR_INDEX_QUERY);
103
132
  }
104
133
  catch (error) {
105
- // Index might already exist
106
134
  if (isDev) {
107
135
  console.warn('Vector index creation warning:', error);
108
136
  }
@@ -115,11 +143,14 @@ const createVectorIndex = async (executeQuery) => {
115
143
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
116
144
  * @param onProgress - Callback for progress updates
117
145
  * @param config - Optional configuration override
146
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
147
+ * @param context - Optional repo/server context for metadata enrichment
118
148
  * @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
119
149
  * Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
120
150
  * and re-embedded; nodes not in the map are embedded fresh.
151
+
121
152
  */
122
- export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, existingEmbeddings) => {
153
+ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds, context, existingEmbeddings) => {
123
154
  const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
124
155
  try {
125
156
  // Phase 1: Load embedding model
@@ -148,6 +179,13 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
148
179
  }
149
180
  // Phase 2: Query embeddable nodes
150
181
  let nodes = await queryEmbeddableNodes(executeQuery);
182
+ // Apply context metadata
183
+ if (context?.repoName) {
184
+ for (const node of nodes) {
185
+ node.repoName = context.repoName;
186
+ node.serverName = context.serverName;
187
+ }
188
+ }
151
189
  // Incremental mode: compare content hashes, delete stale rows, skip fresh ones.
152
190
  // Computed hashes for stale nodes are cached so batchInsertEmbeddings can reuse them
153
191
  // (avoids double computation).
@@ -211,43 +249,99 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
211
249
  });
212
250
  return;
213
251
  }
214
- // Phase 3: Batch embed nodes
252
+ // Phase 3: Chunk + embed nodes
215
253
  const batchSize = finalConfig.batchSize;
216
- const totalBatches = Math.ceil(totalNodes / batchSize);
254
+ const chunkSize = finalConfig.chunkSize;
255
+ const overlap = finalConfig.overlap;
217
256
  let processedNodes = 0;
257
+ let totalChunks = 0;
218
258
  onProgress({
219
259
  phase: 'embedding',
220
260
  percent: 20,
221
261
  nodesProcessed: 0,
222
262
  totalNodes,
223
263
  currentBatch: 0,
224
- totalBatches,
264
+ totalBatches: Math.ceil(totalNodes / batchSize),
225
265
  });
226
- for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) {
227
- const start = batchIndex * batchSize;
228
- const end = Math.min(start + batchSize, totalNodes);
229
- const batch = nodes.slice(start, end);
230
- // Generate texts for this batch
231
- const texts = generateBatchEmbeddingTexts(batch, finalConfig);
232
- // Embed the batch
233
- const embeddings = await embedBatch(texts);
234
- // Update LadybugDB with embeddings
235
- const updates = batch.map((node, i) => ({
236
- id: node.id,
237
- embedding: embeddingToArray(embeddings[i]),
238
- contentHash: computedStaleHashes.get(node.id) ?? contentHashForNode(node, finalConfig),
239
- }));
240
- await batchInsertEmbeddings(executeWithReusedStatement, updates);
266
+ // Process in batches of nodes
267
+ for (let batchIndex = 0; batchIndex < totalNodes; batchIndex += batchSize) {
268
+ const batch = nodes.slice(batchIndex, batchIndex + batchSize);
269
+ // Chunk each node and generate text
270
+ const allTexts = [];
271
+ const allUpdates = [];
272
+ for (const node of batch) {
273
+ const isShort = isShortLabel(node.label);
274
+ const startLine = node.startLine ?? 0;
275
+ const endLine = node.endLine ?? 0;
276
+ // Extract structural names for class-like nodes via AST extractors
277
+ if (!isShort && STRUCTURAL_LABELS.has(node.label)) {
278
+ try {
279
+ const names = await extractStructuralNames(node.content, node.filePath);
280
+ node.methodNames = names.methodNames;
281
+ node.fieldNames = names.fieldNames;
282
+ }
283
+ catch {
284
+ // AST extraction failed — names stay undefined, text-generator handles gracefully
285
+ }
286
+ }
287
+ // Compute content hash once per node (re-use cached value for stale nodes)
288
+ const hash = computedStaleHashes.get(node.id) ?? contentHashForNode(node, finalConfig);
289
+ let chunks;
290
+ if (isShort) {
291
+ chunks = [{ text: node.content, chunkIndex: 0, startLine, endLine }];
292
+ }
293
+ else {
294
+ try {
295
+ chunks = await chunkNode(node.label, node.content, node.filePath, startLine, endLine, chunkSize, overlap);
296
+ }
297
+ catch (chunkErr) {
298
+ if (isDev) {
299
+ console.warn(`⚠️ AST chunking failed for ${node.label} "${node.name}" (${node.filePath}), falling back to character-based chunking:`, chunkErr);
300
+ }
301
+ chunks = characterChunk(node.content, startLine, endLine, chunkSize, overlap);
302
+ }
303
+ }
304
+ for (const chunk of chunks) {
305
+ const text = generateEmbeddingText(node, chunk.text, finalConfig);
306
+ allTexts.push(text);
307
+ allUpdates.push({
308
+ nodeId: node.id,
309
+ chunkIndex: chunk.chunkIndex,
310
+ startLine: chunk.startLine,
311
+ endLine: chunk.endLine,
312
+ contentHash: hash,
313
+ });
314
+ }
315
+ }
316
+ // Embed chunk texts in sub-batches to control memory
317
+ const EMBED_SUB_BATCH = 8;
318
+ for (let si = 0; si < allTexts.length; si += EMBED_SUB_BATCH) {
319
+ const subTexts = allTexts.slice(si, si + EMBED_SUB_BATCH);
320
+ const subUpdates = allUpdates.slice(si, si + EMBED_SUB_BATCH);
321
+ let embeddings;
322
+ try {
323
+ embeddings = await embedBatch(subTexts);
324
+ }
325
+ catch (embedErr) {
326
+ console.error(`❌ embedBatch failed for ${subTexts.length} texts (first: "${subTexts[0]?.substring(0, 80)}..."):`, embedErr);
327
+ throw embedErr;
328
+ }
329
+ const dbUpdates = subUpdates.map((u, i) => ({
330
+ ...u,
331
+ embedding: embeddingToArray(embeddings[i]),
332
+ }));
333
+ await batchInsertEmbeddings(executeWithReusedStatement, dbUpdates);
334
+ }
241
335
  processedNodes += batch.length;
242
- // Report progress (20-90% for embedding phase)
336
+ totalChunks += allUpdates.length;
243
337
  const embeddingProgress = 20 + (processedNodes / totalNodes) * 70;
244
338
  onProgress({
245
339
  phase: 'embedding',
246
340
  percent: Math.round(embeddingProgress),
247
341
  nodesProcessed: processedNodes,
248
342
  totalNodes,
249
- currentBatch: batchIndex + 1,
250
- totalBatches,
343
+ currentBatch: Math.floor(batchIndex / batchSize) + 1,
344
+ totalBatches: Math.ceil(totalNodes / batchSize),
251
345
  });
252
346
  }
253
347
  // Phase 4: Create vector index
@@ -261,7 +355,6 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
261
355
  console.log('📇 Creating vector index...');
262
356
  }
263
357
  await createVectorIndex(executeQuery);
264
- // Complete
265
358
  onProgress({
266
359
  phase: 'ready',
267
360
  percent: 100,
@@ -269,7 +362,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
269
362
  totalNodes,
270
363
  });
271
364
  if (isDev) {
272
- console.log('✅ Embedding pipeline complete!');
365
+ console.log(`✅ Embedding pipeline complete! (${totalChunks} chunks from ${totalNodes} nodes)`);
273
366
  }
274
367
  }
275
368
  catch (error) {
@@ -286,68 +379,57 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
286
379
  }
287
380
  };
288
381
  /**
289
- * Perform semantic search using the vector index
290
- *
291
- * Uses CodeEmbedding table and queries each node table to get metadata
292
- *
293
- * @param executeQuery - Function to execute Cypher queries
294
- * @param query - Search query text
295
- * @param k - Number of results to return (default: 10)
296
- * @param maxDistance - Maximum distance threshold (default: 0.5)
297
- * @returns Array of search results ordered by relevance
382
+ * Perform semantic search using the vector index with chunk deduplication
298
383
  */
299
384
  export const semanticSearch = async (executeQuery, query, k = 10, maxDistance = 0.5) => {
300
385
  if (!isEmbedderReady()) {
301
386
  throw new Error('Embedding model not initialized. Run embedding pipeline first.');
302
387
  }
303
- // Embed the query
304
388
  const queryEmbedding = await embedText(query);
305
389
  const queryVec = embeddingToArray(queryEmbedding);
306
390
  const queryVecStr = `[${queryVec.join(',')}]`;
307
- // Query the vector index on CodeEmbedding to get nodeIds and distances
308
- const vectorQuery = `
309
- CALL QUERY_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}',
310
- CAST(${queryVecStr} AS FLOAT[${queryVec.length}]), ${k})
311
- YIELD node AS emb, distance
312
- WITH emb, distance
313
- WHERE distance < ${maxDistance}
314
- RETURN emb.nodeId AS nodeId, distance
315
- ORDER BY distance
316
- `;
317
- const embResults = await executeQuery(vectorQuery);
318
- if (embResults.length === 0) {
391
+ const bestChunks = await collectBestChunks(k, async (fetchLimit) => {
392
+ const vectorQuery = `
393
+ CALL QUERY_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', '${EMBEDDING_INDEX_NAME}',
394
+ CAST(${queryVecStr} AS FLOAT[${queryVec.length}]), ${fetchLimit})
395
+ YIELD node AS emb, distance
396
+ WITH emb, distance
397
+ WHERE distance < ${maxDistance}
398
+ RETURN emb.nodeId AS nodeId, emb.chunkIndex AS chunkIndex,
399
+ emb.startLine AS startLine, emb.endLine AS endLine, distance
400
+ ORDER BY distance
401
+ `;
402
+ const embResults = await executeQuery(vectorQuery);
403
+ return embResults.map((row) => ({
404
+ nodeId: row.nodeId ?? row[0],
405
+ chunkIndex: row.chunkIndex ?? row[1] ?? 0,
406
+ startLine: row.startLine ?? row[2] ?? 0,
407
+ endLine: row.endLine ?? row[3] ?? 0,
408
+ distance: row.distance ?? row[4],
409
+ }));
410
+ });
411
+ if (bestChunks.size === 0) {
319
412
  return [];
320
413
  }
321
414
  // Group results by label for batched metadata queries
322
415
  const byLabel = new Map();
323
- for (const embRow of embResults) {
324
- const nodeId = embRow.nodeId ?? embRow[0];
325
- const distance = embRow.distance ?? embRow[1];
416
+ for (const [nodeId, chunk] of Array.from(bestChunks.entries()).slice(0, k)) {
326
417
  const labelEndIdx = nodeId.indexOf(':');
327
418
  const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown';
328
419
  if (!byLabel.has(label))
329
420
  byLabel.set(label, []);
330
- byLabel.get(label).push({ nodeId, distance });
421
+ byLabel.get(label).push({ nodeId, ...chunk });
331
422
  }
332
423
  // Batch-fetch metadata per label
333
424
  const results = [];
334
425
  for (const [label, items] of byLabel) {
335
426
  const idList = items.map((i) => `'${i.nodeId.replace(/'/g, "''")}'`).join(', ');
336
427
  try {
337
- let nodeQuery;
338
- if (label === 'File') {
339
- nodeQuery = `
340
- MATCH (n:File) WHERE n.id IN [${idList}]
341
- RETURN n.id AS id, n.name AS name, n.filePath AS filePath
342
- `;
343
- }
344
- else {
345
- nodeQuery = `
346
- MATCH (n:${label}) WHERE n.id IN [${idList}]
347
- RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
348
- n.startLine AS startLine, n.endLine AS endLine
349
- `;
350
- }
428
+ const nodeQuery = `
429
+ MATCH (n:\`${label}\`) WHERE n.id IN [${idList}]
430
+ RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
431
+ n.startLine AS startLine, n.endLine AS endLine
432
+ `;
351
433
  const nodeRows = await executeQuery(nodeQuery);
352
434
  const rowMap = new Map();
353
435
  for (const row of nodeRows) {
@@ -363,8 +445,8 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
363
445
  label,
364
446
  filePath: nodeRow.filePath ?? nodeRow[2] ?? '',
365
447
  distance: item.distance,
366
- startLine: label !== 'File' ? (nodeRow.startLine ?? nodeRow[3]) : undefined,
367
- endLine: label !== 'File' ? (nodeRow.endLine ?? nodeRow[4]) : undefined,
448
+ startLine: item.startLine,
449
+ endLine: item.endLine,
368
450
  });
369
451
  }
370
452
  }
@@ -373,26 +455,13 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
373
455
  // Table might not exist, skip
374
456
  }
375
457
  }
376
- // Re-sort by distance since batch queries may have mixed order
377
458
  results.sort((a, b) => a.distance - b.distance);
378
459
  return results;
379
460
  };
380
461
  /**
381
462
  * Semantic search with graph expansion (flattened results)
382
- *
383
- * Note: With multi-table schema, graph traversal is simplified.
384
- * Returns semantic matches with their metadata.
385
- * For full graph traversal, use execute_vector_cypher tool directly.
386
- *
387
- * @param executeQuery - Function to execute Cypher queries
388
- * @param query - Search query text
389
- * @param k - Number of initial semantic matches (default: 5)
390
- * @param _hops - Unused (kept for API compatibility).
391
- * @returns Semantic matches with metadata
392
463
  */
393
464
  export const semanticSearchWithContext = async (executeQuery, query, k = 5, _hops = 1) => {
394
- // For multi-table schema, just return semantic search results
395
- // Graph traversal is complex with separate tables - use execute_vector_cypher instead
396
465
  const results = await semanticSearch(executeQuery, query, k, 0.5);
397
466
  return results.map((r) => ({
398
467
  matchId: r.nodeId,
@@ -0,0 +1,7 @@
1
+ export interface ResolvedLineRange {
2
+ startLine: number;
3
+ endLine: number;
4
+ }
5
+ export declare const buildLineIndex: (content: string) => Int32Array;
6
+ export declare const lineFromOffset: (lineOffsets: Int32Array, charOffset: number) => number;
7
+ export declare const resolveChunkLines: (lineOffsets: Int32Array, startOffset: number, endOffset: number, baseStartLine: number) => ResolvedLineRange;
@@ -0,0 +1,42 @@
1
+ export const buildLineIndex = (content) => {
2
+ const offsets = [0];
3
+ for (let i = 0; i < content.length; i++) {
4
+ if (content.charCodeAt(i) === 10)
5
+ offsets.push(i + 1);
6
+ }
7
+ return new Int32Array(offsets);
8
+ };
9
+ const clampOffset = (lineOffsets, charOffset) => {
10
+ if (lineOffsets.length === 0)
11
+ return 0;
12
+ const maxOffset = lineOffsets[lineOffsets.length - 1];
13
+ if (charOffset < 0)
14
+ return 0;
15
+ if (charOffset > maxOffset)
16
+ return maxOffset;
17
+ return charOffset;
18
+ };
19
+ export const lineFromOffset = (lineOffsets, charOffset) => {
20
+ if (lineOffsets.length === 0)
21
+ return 0;
22
+ const clamped = clampOffset(lineOffsets, charOffset);
23
+ let lo = 0;
24
+ let hi = lineOffsets.length - 1;
25
+ while (lo < hi) {
26
+ const mid = (lo + hi + 1) >> 1;
27
+ if (lineOffsets[mid] <= clamped)
28
+ lo = mid;
29
+ else
30
+ hi = mid - 1;
31
+ }
32
+ return lo;
33
+ };
34
+ export const resolveChunkLines = (lineOffsets, startOffset, endOffset, baseStartLine) => {
35
+ const relativeStartLine = lineFromOffset(lineOffsets, startOffset);
36
+ const effectiveEndOffset = endOffset > startOffset ? endOffset - 1 : startOffset;
37
+ const relativeEndLine = lineFromOffset(lineOffsets, effectiveEndOffset);
38
+ return {
39
+ startLine: baseStartLine + relativeStartLine,
40
+ endLine: baseStartLine + relativeEndLine,
41
+ };
42
+ };
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Server Mapping Configuration
3
+ *
4
+ * Reads ~/.gitnexus/server-mapping.json to map repo names to service names.
5
+ * Used in embedding text to enrich metadata with microservice context.
6
+ */
7
+ /**
8
+ * Read the server mapping file and return the serverName for a given repoName.
9
+ * Returns undefined if no mapping exists.
10
+ */
11
+ export declare const readServerMapping: (repoName: string) => Promise<string | undefined>;
12
+ /**
13
+ * Clear the cached mapping (useful for testing or after file changes)
14
+ */
15
+ export declare const clearServerMappingCache: () => void;
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Server Mapping Configuration
3
+ *
4
+ * Reads ~/.gitnexus/server-mapping.json to map repo names to service names.
5
+ * Used in embedding text to enrich metadata with microservice context.
6
+ */
7
+ import fs from 'fs/promises';
8
+ import path from 'path';
9
+ import os from 'os';
10
+ const MAPPING_FILE = path.join(os.homedir(), '.gitnexus', 'server-mapping.json');
11
+ let cachedMapping = null;
12
+ /**
13
+ * Read the server mapping file and return the serverName for a given repoName.
14
+ * Returns undefined if no mapping exists.
15
+ */
16
+ export const readServerMapping = async (repoName) => {
17
+ try {
18
+ if (!cachedMapping) {
19
+ const raw = await fs.readFile(MAPPING_FILE, 'utf-8');
20
+ cachedMapping = JSON.parse(raw);
21
+ }
22
+ return cachedMapping[repoName];
23
+ }
24
+ catch {
25
+ return undefined;
26
+ }
27
+ };
28
+ /**
29
+ * Clear the cached mapping (useful for testing or after file changes)
30
+ */
31
+ export const clearServerMappingCache = () => {
32
+ cachedMapping = null;
33
+ };
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Structural Extractor Module
3
+ *
4
+ * Reuses ingestion pipeline's AST-based MethodExtractor / FieldExtractor
5
+ * to extract method and field names for embedding text generation.
6
+ */
7
+ export interface StructuralNames {
8
+ methodNames: string[];
9
+ fieldNames: string[];
10
+ }
11
+ /**
12
+ * Extract method and field names from a class/struct/interface node
13
+ * using the ingestion pipeline's AST extractors.
14
+ */
15
+ export declare const extractStructuralNames: (content: string, filePath: string) => Promise<StructuralNames>;