gitnexus 1.6.2-rc.8 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/dist/_shared/lbug/schema-constants.d.ts +1 -1
  2. package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
  3. package/dist/_shared/lbug/schema-constants.js +1 -0
  4. package/dist/_shared/lbug/schema-constants.js.map +1 -1
  5. package/dist/_shared/mro-strategy.d.ts +38 -16
  6. package/dist/_shared/mro-strategy.d.ts.map +1 -1
  7. package/dist/cli/ai-context.js +0 -58
  8. package/dist/cli/analyze.js +3 -0
  9. package/dist/core/embeddings/ast-utils.d.ts +22 -0
  10. package/dist/core/embeddings/ast-utils.js +105 -0
  11. package/dist/core/embeddings/character-chunk.d.ts +12 -0
  12. package/dist/core/embeddings/character-chunk.js +43 -0
  13. package/dist/core/embeddings/chunker.d.ts +14 -0
  14. package/dist/core/embeddings/chunker.js +234 -0
  15. package/dist/core/embeddings/embedder.js +5 -0
  16. package/dist/core/embeddings/embedding-pipeline.d.ts +20 -24
  17. package/dist/core/embeddings/embedding-pipeline.js +176 -107
  18. package/dist/core/embeddings/line-index.d.ts +7 -0
  19. package/dist/core/embeddings/line-index.js +42 -0
  20. package/dist/core/embeddings/server-mapping.d.ts +15 -0
  21. package/dist/core/embeddings/server-mapping.js +33 -0
  22. package/dist/core/embeddings/structural-extractor.d.ts +15 -0
  23. package/dist/core/embeddings/structural-extractor.js +58 -0
  24. package/dist/core/embeddings/text-generator.d.ts +20 -13
  25. package/dist/core/embeddings/text-generator.js +151 -119
  26. package/dist/core/embeddings/types.d.ts +81 -3
  27. package/dist/core/embeddings/types.js +105 -3
  28. package/dist/core/group/extractors/http-patterns/node.js +130 -0
  29. package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
  30. package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
  31. package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
  32. package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
  33. package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
  34. package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
  35. package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
  36. package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
  37. package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
  38. package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
  39. package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
  40. package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
  41. package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
  42. package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
  43. package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
  44. package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
  45. package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
  46. package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
  47. package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
  48. package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
  49. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
  50. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
  51. package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
  52. package/dist/core/ingestion/call-extractors/generic.js +59 -0
  53. package/dist/core/ingestion/call-processor.d.ts +2 -4
  54. package/dist/core/ingestion/call-processor.js +221 -89
  55. package/dist/core/ingestion/call-routing.d.ts +8 -12
  56. package/dist/core/ingestion/call-routing.js +13 -34
  57. package/dist/core/ingestion/call-types.d.ts +135 -0
  58. package/dist/core/ingestion/call-types.js +2 -0
  59. package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
  60. package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
  61. package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
  62. package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
  63. package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
  64. package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
  65. package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
  66. package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
  67. package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
  68. package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
  69. package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
  70. package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
  71. package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
  72. package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
  73. package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
  74. package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
  75. package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
  76. package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
  77. package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
  78. package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
  79. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
  80. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
  81. package/dist/core/ingestion/field-types.d.ts +1 -1
  82. package/dist/core/ingestion/heritage-extractors/configs/go.d.ts +13 -0
  83. package/dist/core/ingestion/heritage-extractors/configs/go.js +20 -0
  84. package/dist/core/ingestion/heritage-extractors/configs/ruby.d.ts +18 -0
  85. package/dist/core/ingestion/heritage-extractors/configs/ruby.js +65 -0
  86. package/dist/core/ingestion/heritage-extractors/generic.d.ts +23 -0
  87. package/dist/core/ingestion/heritage-extractors/generic.js +47 -0
  88. package/dist/core/ingestion/heritage-processor.d.ts +9 -0
  89. package/dist/core/ingestion/heritage-processor.js +120 -85
  90. package/dist/core/ingestion/heritage-types.d.ts +73 -0
  91. package/dist/core/ingestion/heritage-types.js +2 -0
  92. package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
  93. package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
  94. package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
  95. package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
  96. package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
  97. package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
  98. package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
  99. package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
  100. package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
  101. package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
  102. package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
  103. package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
  104. package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
  105. package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
  106. package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
  107. package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
  108. package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
  109. package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
  110. package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
  111. package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
  112. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
  113. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
  114. package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
  115. package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
  116. package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
  117. package/dist/core/ingestion/import-resolvers/go.js +4 -19
  118. package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
  119. package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
  120. package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
  121. package/dist/core/ingestion/import-resolvers/php.js +4 -7
  122. package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
  123. package/dist/core/ingestion/import-resolvers/python.js +3 -18
  124. package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
  125. package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
  126. package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
  127. package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
  128. package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
  129. package/dist/core/ingestion/import-resolvers/rust.js +4 -47
  130. package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
  131. package/dist/core/ingestion/import-resolvers/standard.js +7 -8
  132. package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
  133. package/dist/core/ingestion/language-provider.d.ts +80 -0
  134. package/dist/core/ingestion/languages/c-cpp.js +18 -12
  135. package/dist/core/ingestion/languages/csharp.js +13 -21
  136. package/dist/core/ingestion/languages/dart.js +13 -7
  137. package/dist/core/ingestion/languages/go.js +14 -20
  138. package/dist/core/ingestion/languages/java.js +13 -18
  139. package/dist/core/ingestion/languages/kotlin.js +13 -13
  140. package/dist/core/ingestion/languages/php.js +13 -7
  141. package/dist/core/ingestion/languages/python.js +13 -7
  142. package/dist/core/ingestion/languages/ruby.js +103 -22
  143. package/dist/core/ingestion/languages/rust.js +13 -7
  144. package/dist/core/ingestion/languages/swift.js +13 -18
  145. package/dist/core/ingestion/languages/typescript.js +18 -23
  146. package/dist/core/ingestion/languages/vue.js +13 -17
  147. package/dist/core/ingestion/model/heritage-map.d.ts +35 -0
  148. package/dist/core/ingestion/model/heritage-map.js +110 -9
  149. package/dist/core/ingestion/model/index.d.ts +2 -2
  150. package/dist/core/ingestion/model/index.js +1 -1
  151. package/dist/core/ingestion/model/resolve.d.ts +33 -28
  152. package/dist/core/ingestion/model/resolve.js +111 -27
  153. package/dist/core/ingestion/parsing-processor.d.ts +1 -2
  154. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +1 -0
  155. package/dist/core/ingestion/pipeline-phases/parse-impl.js +9 -3
  156. package/dist/core/ingestion/pipeline-phases/parse.d.ts +7 -0
  157. package/dist/core/ingestion/pipeline.d.ts +11 -0
  158. package/dist/core/ingestion/pipeline.js +9 -2
  159. package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
  160. package/dist/core/ingestion/tree-sitter-queries.js +81 -0
  161. package/dist/core/ingestion/type-env.d.ts +1 -1
  162. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
  163. package/dist/core/ingestion/utils/ast-helpers.js +22 -2
  164. package/dist/core/ingestion/utils/ruby-self-call.d.ts +52 -0
  165. package/dist/core/ingestion/utils/ruby-self-call.js +59 -0
  166. package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
  167. package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
  168. package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
  169. package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
  170. package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
  171. package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
  172. package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
  173. package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
  174. package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
  175. package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
  176. package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
  177. package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
  178. package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
  179. package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
  180. package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
  181. package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
  182. package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
  183. package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
  184. package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
  185. package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
  186. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
  187. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
  188. package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
  189. package/dist/core/ingestion/variable-extractors/generic.js +80 -0
  190. package/dist/core/ingestion/variable-types.d.ts +82 -0
  191. package/dist/core/ingestion/variable-types.js +2 -0
  192. package/dist/core/ingestion/workers/parse-worker.js +244 -217
  193. package/dist/core/ingestion/workers/worker-pool.js +3 -0
  194. package/dist/core/lbug/csv-generator.js +1 -0
  195. package/dist/core/lbug/lbug-adapter.d.ts +4 -5
  196. package/dist/core/lbug/lbug-adapter.js +38 -14
  197. package/dist/core/lbug/schema.d.ts +2 -1
  198. package/dist/core/lbug/schema.js +10 -1
  199. package/dist/core/run-analyze.js +6 -7
  200. package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
  201. package/dist/core/tree-sitter/parser-loader.js +17 -8
  202. package/dist/mcp/core/embedder.js +5 -0
  203. package/dist/mcp/local/local-backend.js +29 -19
  204. package/dist/server/api.js +2 -0
  205. package/dist/types/pipeline.d.ts +6 -0
  206. package/package.json +8 -7
  207. package/scripts/build-tree-sitter-proto.cjs +82 -0
  208. package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
  209. package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
  210. package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
  211. package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
  212. package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
  213. package/vendor/tree-sitter-proto/package.json +1 -7
  214. package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
  215. package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
  216. package/dist/core/ingestion/call-sites/java.d.ts +0 -9
  217. package/dist/core/ingestion/call-sites/java.js +0 -30
  218. package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
  219. package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
  220. package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
  221. package/dist/core/ingestion/import-resolvers/vue.js +0 -9
  222. package/scripts/preinstall-cleanup.cjs +0 -34
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Structural Extractor Module
3
+ *
4
+ * Reuses ingestion pipeline's AST-based MethodExtractor / FieldExtractor
5
+ * to extract method and field names for embedding text generation.
6
+ */
7
+ import { getProviderForFile } from '../ingestion/languages/index.js';
8
+ import { buildTypeEnv } from '../ingestion/type-env.js';
9
+ import { ensureAndParse, findDeclarationNode } from './ast-utils.js';
10
+ const NOOP_SYMBOL_TABLE = {
11
+ lookupExactAll: () => [],
12
+ lookupExact: () => undefined,
13
+ lookupExactFull: () => undefined,
14
+ };
15
+ /**
16
+ * Extract method and field names from a class/struct/interface node
17
+ * using the ingestion pipeline's AST extractors.
18
+ */
19
+ export const extractStructuralNames = async (content, filePath) => {
20
+ const provider = getProviderForFile(filePath);
21
+ if (!provider)
22
+ return { methodNames: [], fieldNames: [] };
23
+ const tree = await ensureAndParse(content, filePath);
24
+ if (!tree)
25
+ return { methodNames: [], fieldNames: [] };
26
+ // Parse node.content (a snippet) — find declaration directly, not by range
27
+ const classNode = findDeclarationNode(tree.rootNode);
28
+ if (!classNode)
29
+ return { methodNames: [], fieldNames: [] };
30
+ const language = provider.id;
31
+ const methodNames = extractMethodNames(classNode, provider, filePath, language);
32
+ const fieldNames = extractFieldNames(classNode, provider, tree, filePath, language);
33
+ return { methodNames, fieldNames };
34
+ };
35
+ function extractMethodNames(classNode, provider, filePath, language) {
36
+ if (!provider.methodExtractor)
37
+ return [];
38
+ const context = { filePath, language };
39
+ const result = provider.methodExtractor.extract(classNode, context);
40
+ if (!result?.methods?.length)
41
+ return [];
42
+ return result.methods.map((m) => m.name);
43
+ }
44
+ function extractFieldNames(classNode, provider, tree, filePath, language) {
45
+ if (!provider.fieldExtractor)
46
+ return [];
47
+ const typeEnv = buildTypeEnv(tree, language);
48
+ const context = {
49
+ typeEnv,
50
+ symbolTable: NOOP_SYMBOL_TABLE,
51
+ filePath,
52
+ language,
53
+ };
54
+ const result = provider.fieldExtractor.extract(classNode, context);
55
+ if (!result?.fields?.length)
56
+ return [];
57
+ return result.fields.map((f) => f.name);
58
+ }
@@ -1,24 +1,31 @@
1
1
  /**
2
2
  * Text Generator Module
3
3
  *
4
- * Pure functions to generate embedding text from code nodes.
5
- * Combines node metadata with code snippets for semantic matching.
4
+ * Generates enriched embedding text from code nodes with metadata.
5
+ * Supports chunkable labels (Function/Method with AST chunking),
6
+ * Class-specific structural text, and short-node direct embed.
7
+ *
8
+ * Method/field names for Class nodes are extracted by the ingestion
9
+ * pipeline's AST extractors and passed via node.methodNames/node.fieldNames.
6
10
  */
7
11
  import type { EmbeddableNode, EmbeddingConfig } from './types.js';
12
+ /**
13
+ * Truncate description to max length at sentence/word boundary
14
+ */
15
+ declare const truncateDescription: (text: string, maxLength: number) => string;
16
+ /**
17
+ * Extract class/interface/struct declaration lines, skipping method bodies.
18
+ * - Brace-based languages: detects method signatures (lines with `(` and `{`)
19
+ * and skips until depth returns to class body level.
20
+ * - Non-brace languages (Python/Ruby): returns empty string (patterns handle extraction).
21
+ */
22
+ export declare const extractDeclarationOnly: (content: string) => string;
8
23
  /**
9
24
  * Generate embedding text for any embeddable node
10
25
  * Dispatches to the appropriate generator based on node label
11
- *
12
- * @param node - The node to generate text for
13
- * @param config - Optional configuration for max snippet length
14
- * @returns Text suitable for embedding
15
26
  */
16
- export declare const generateEmbeddingText: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
27
+ export declare const generateEmbeddingText: (node: EmbeddableNode, codeBody: string, config?: Partial<EmbeddingConfig>) => string;
17
28
  /**
18
- * Generate embedding texts for a batch of nodes
19
- *
20
- * @param nodes - Array of nodes to generate text for
21
- * @param config - Optional configuration
22
- * @returns Array of texts in the same order as input nodes
29
+ * Export truncation helper for testing
23
30
  */
24
- export declare const generateBatchEmbeddingTexts: (nodes: EmbeddableNode[], config?: Partial<EmbeddingConfig>) => string[];
31
+ export { truncateDescription };
@@ -1,167 +1,199 @@
1
1
  /**
2
2
  * Text Generator Module
3
3
  *
4
- * Pure functions to generate embedding text from code nodes.
5
- * Combines node metadata with code snippets for semantic matching.
6
- */
7
- import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
8
- /**
9
- * Extract the filename from a file path
10
- */
11
- const getFileName = (filePath) => {
12
- const parts = filePath.split('/');
13
- return parts[parts.length - 1] || filePath;
14
- };
15
- /**
16
- * Extract the directory path from a file path
4
+ * Generates enriched embedding text from code nodes with metadata.
5
+ * Supports chunkable labels (Function/Method with AST chunking),
6
+ * Class-specific structural text, and short-node direct embed.
7
+ *
8
+ * Method/field names for Class nodes are extracted by the ingestion
9
+ * pipeline's AST extractors and passed via node.methodNames/node.fieldNames.
17
10
  */
18
- const getDirectory = (filePath) => {
19
- const parts = filePath.split('/');
20
- parts.pop();
21
- return parts.join('/') || '';
22
- };
11
+ import { DEFAULT_EMBEDDING_CONFIG, isShortLabel } from './types.js';
23
12
  /**
24
- * Truncate content to max length, preserving word boundaries
13
+ * Truncate description to max length at sentence/word boundary
25
14
  */
26
- const truncateContent = (content, maxLength) => {
27
- if (content.length <= maxLength) {
28
- return content;
15
+ const truncateDescription = (text, maxLength) => {
16
+ if (text.length <= maxLength)
17
+ return text;
18
+ const truncated = text.slice(0, maxLength);
19
+ // Try sentence boundary (. ! ?)
20
+ const sentenceEnd = Math.max(truncated.lastIndexOf('. '), truncated.lastIndexOf('! '), truncated.lastIndexOf('? '));
21
+ if (sentenceEnd > maxLength * 0.5) {
22
+ return truncated.slice(0, sentenceEnd + 1);
29
23
  }
30
- // Find last space before maxLength to avoid cutting words
31
- const truncated = content.slice(0, maxLength);
24
+ // Try word boundary
32
25
  const lastSpace = truncated.lastIndexOf(' ');
33
- if (lastSpace > maxLength * 0.8) {
34
- return truncated.slice(0, lastSpace) + '...';
26
+ if (lastSpace > maxLength * 0.5) {
27
+ return truncated.slice(0, lastSpace);
35
28
  }
36
- return truncated + '...';
29
+ return truncated;
37
30
  };
38
31
  /**
39
32
  * Clean code content for embedding
40
- * Removes excessive whitespace while preserving structure
41
33
  */
42
34
  const cleanContent = (content) => {
43
- return (content
44
- // Normalize line endings
35
+ return content
45
36
  .replace(/\r\n/g, '\n')
46
- // Remove excessive blank lines (more than 2)
47
37
  .replace(/\n{3,}/g, '\n\n')
48
- // Trim each line
49
38
  .split('\n')
50
39
  .map((line) => line.trimEnd())
51
40
  .join('\n')
52
- .trim());
41
+ .trim();
53
42
  };
54
43
  /**
55
- * Generate embedding text for a Function node
44
+ * Build metadata header for a node
56
45
  */
57
- const generateFunctionText = (node, maxSnippetLength) => {
58
- const parts = [`Function: ${node.name}`, `File: ${getFileName(node.filePath)}`];
59
- const dir = getDirectory(node.filePath);
60
- if (dir) {
61
- parts.push(`Directory: ${dir}`);
46
+ const buildMetadataHeader = (node, config) => {
47
+ const parts = [];
48
+ // Label + name
49
+ parts.push(`${node.label}: ${node.name}`);
50
+ // Repo name
51
+ if (node.repoName) {
52
+ parts.push(`Repo: ${node.repoName}`);
62
53
  }
63
- if (node.content) {
64
- const cleanedContent = cleanContent(node.content);
65
- const snippet = truncateContent(cleanedContent, maxSnippetLength);
66
- parts.push('', snippet);
54
+ // Server name (optional)
55
+ if (node.serverName) {
56
+ parts.push(`Server: ${node.serverName}`);
67
57
  }
68
- return parts.join('\n');
69
- };
70
- /**
71
- * Generate embedding text for a Class node
72
- */
73
- const generateClassText = (node, maxSnippetLength) => {
74
- const parts = [`Class: ${node.name}`, `File: ${getFileName(node.filePath)}`];
75
- const dir = getDirectory(node.filePath);
76
- if (dir) {
77
- parts.push(`Directory: ${dir}`);
58
+ // Full file path
59
+ parts.push(`Path: ${node.filePath}`);
60
+ // Export status
61
+ if (node.isExported !== undefined) {
62
+ parts.push(`Export: ${node.isExported}`);
78
63
  }
79
- if (node.content) {
80
- const cleanedContent = cleanContent(node.content);
81
- const snippet = truncateContent(cleanedContent, maxSnippetLength);
82
- parts.push('', snippet);
64
+ // Description (truncated)
65
+ if (node.description) {
66
+ const maxLen = config.maxDescriptionLength ?? DEFAULT_EMBEDDING_CONFIG.maxDescriptionLength;
67
+ const truncated = truncateDescription(node.description, maxLen);
68
+ if (truncated) {
69
+ parts.push(truncated);
70
+ }
83
71
  }
84
72
  return parts.join('\n');
85
73
  };
74
+ const generateCodeBodyText = (node, codeBody, config) => {
75
+ const header = buildMetadataHeader(node, config);
76
+ const cleaned = cleanContent(codeBody);
77
+ return `${header}\n\n${cleaned}`;
78
+ };
86
79
  /**
87
- * Generate embedding text for a Method node
80
+ * Generate embedding text for Class nodes
81
+ * Signature + properties + method name list only (no method bodies)
82
+ * Method/field names come from AST extractors via node.methodNames/node.fieldNames.
88
83
  */
89
- const generateMethodText = (node, maxSnippetLength) => {
90
- const parts = [`Method: ${node.name}`, `File: ${getFileName(node.filePath)}`];
91
- const dir = getDirectory(node.filePath);
92
- if (dir) {
93
- parts.push(`Directory: ${dir}`);
84
+ const generateClassText = (node, codeBody, config) => {
85
+ return generateStructuralTypeText(node, codeBody, config);
86
+ };
87
+ const generateStructuralTypeText = (node, codeBody, config) => {
88
+ const header = buildMetadataHeader(node, config);
89
+ const parts = [header];
90
+ if (node.methodNames?.length) {
91
+ parts.push(`Methods: ${node.methodNames.join(', ')}`);
94
92
  }
95
- if (node.content) {
96
- const cleanedContent = cleanContent(node.content);
97
- const snippet = truncateContent(cleanedContent, maxSnippetLength);
98
- parts.push('', snippet);
93
+ if (node.fieldNames?.length) {
94
+ parts.push(`Properties: ${node.fieldNames.join(', ')}`);
99
95
  }
100
- return parts.join('\n');
101
- };
102
- /**
103
- * Generate embedding text for an Interface node
104
- */
105
- const generateInterfaceText = (node, maxSnippetLength) => {
106
- const parts = [`Interface: ${node.name}`, `File: ${getFileName(node.filePath)}`];
107
- const dir = getDirectory(node.filePath);
108
- if (dir) {
109
- parts.push(`Directory: ${dir}`);
96
+ const declarationOnly = extractDeclarationOnly(cleanContent(node.content));
97
+ if (declarationOnly) {
98
+ parts.push('', declarationOnly);
110
99
  }
111
- if (node.content) {
112
- const cleanedContent = cleanContent(node.content);
113
- const snippet = truncateContent(cleanedContent, maxSnippetLength);
114
- parts.push('', snippet);
100
+ const cleanedChunk = cleanContent(codeBody);
101
+ if (cleanedChunk && cleanedChunk !== cleanContent(node.content)) {
102
+ parts.push('', cleanedChunk);
115
103
  }
116
104
  return parts.join('\n');
117
105
  };
106
+ const DECL_START_RE = /^(?:(?:export|pub|data|abstract)\s+)*(?:type\s+\w+\s+struct|(?:class|struct|enum|interface)\s)/;
118
107
  /**
119
- * Generate embedding text for a File node
120
- * Uses file name and first N characters of content
108
+ * Extract class/interface/struct declaration lines, skipping method bodies.
109
+ * - Brace-based languages: detects method signatures (lines with `(` and `{`)
110
+ * and skips until depth returns to class body level.
111
+ * - Non-brace languages (Python/Ruby): returns empty string (patterns handle extraction).
121
112
  */
122
- const generateFileText = (node, maxSnippetLength) => {
123
- const parts = [`File: ${node.name}`, `Path: ${node.filePath}`];
124
- if (node.content) {
125
- const cleanedContent = cleanContent(node.content);
126
- // For files, use a shorter snippet since they can be very long
127
- const snippet = truncateContent(cleanedContent, Math.min(maxSnippetLength, 300));
128
- parts.push('', snippet);
113
+ export const extractDeclarationOnly = (content) => {
114
+ const lines = content.split('\n');
115
+ const declLines = [];
116
+ let depth = 0;
117
+ let started = false;
118
+ let classDepth = 0;
119
+ let skipDepth = 0;
120
+ for (const [idx, line] of lines.entries()) {
121
+ const trimmed = line.trim();
122
+ if (!started) {
123
+ if (DECL_START_RE.test(trimmed)) {
124
+ // Non-brace language check: current line or next 3 lines must have `{`
125
+ const nextLines = lines.slice(idx + 1, idx + 4);
126
+ if (!trimmed.includes('{') && !nextLines.some((l) => l.includes('{'))) {
127
+ return '';
128
+ }
129
+ started = true;
130
+ declLines.push(trimmed);
131
+ for (const ch of trimmed) {
132
+ if (ch === '{')
133
+ depth++;
134
+ else if (ch === '}')
135
+ depth--;
136
+ }
137
+ if (depth > 0)
138
+ classDepth = depth;
139
+ }
140
+ continue;
141
+ }
142
+ // Always update depth (even when skipping)
143
+ const opens = (trimmed.match(/{/g) || []).length;
144
+ const closes = (trimmed.match(/}/g) || []).length;
145
+ const prevDepth = depth;
146
+ depth += opens - closes;
147
+ if (skipDepth > 0) {
148
+ if (depth <= classDepth) {
149
+ skipDepth = 0;
150
+ // Closing brace of class
151
+ if (depth <= 0) {
152
+ declLines.push(trimmed);
153
+ break;
154
+ }
155
+ }
156
+ continue;
157
+ }
158
+ // Detect method signature: line has both `(` and `{` and goes deeper than class body
159
+ const hasParens = trimmed.includes('(');
160
+ const hasOpenBrace = opens > 0;
161
+ if (hasParens && hasOpenBrace && prevDepth + opens > classDepth) {
162
+ if (opens === closes && trimmed.endsWith(';')) {
163
+ // Property with function/object initializer like `config = { timeout: 5000 };` — keep
164
+ declLines.push(trimmed);
165
+ }
166
+ // else: single-line or multi-line method — skip entirely
167
+ if (opens !== closes) {
168
+ skipDepth = classDepth;
169
+ }
170
+ continue;
171
+ }
172
+ declLines.push(trimmed);
173
+ if (depth <= 0 && declLines.length > 1)
174
+ break;
129
175
  }
130
- return parts.join('\n');
176
+ return declLines.join('\n').trim();
131
177
  };
132
178
  /**
133
179
  * Generate embedding text for any embeddable node
134
180
  * Dispatches to the appropriate generator based on node label
135
- *
136
- * @param node - The node to generate text for
137
- * @param config - Optional configuration for max snippet length
138
- * @returns Text suitable for embedding
139
181
  */
140
- export const generateEmbeddingText = (node, config = {}) => {
141
- const maxSnippetLength = config.maxSnippetLength ?? DEFAULT_EMBEDDING_CONFIG.maxSnippetLength;
142
- switch (node.label) {
143
- case 'Function':
144
- return generateFunctionText(node, maxSnippetLength);
145
- case 'Class':
146
- return generateClassText(node, maxSnippetLength);
147
- case 'Method':
148
- return generateMethodText(node, maxSnippetLength);
149
- case 'Interface':
150
- return generateInterfaceText(node, maxSnippetLength);
151
- case 'File':
152
- return generateFileText(node, maxSnippetLength);
153
- default:
154
- // Fallback for any other embeddable type
155
- return `${node.label}: ${node.name}\nPath: ${node.filePath}`;
182
+ export const generateEmbeddingText = (node, codeBody, config = {}) => {
183
+ if (isShortLabel(node.label)) {
184
+ const header = buildMetadataHeader(node, config);
185
+ const cleaned = cleanContent(node.content);
186
+ return `${header}\n\n${cleaned}`;
187
+ }
188
+ if (node.label === 'Class') {
189
+ return generateClassText(node, codeBody, config);
190
+ }
191
+ if (node.label === 'Interface') {
192
+ return generateStructuralTypeText(node, codeBody, config);
156
193
  }
194
+ return generateCodeBodyText(node, codeBody, config);
157
195
  };
158
196
  /**
159
- * Generate embedding texts for a batch of nodes
160
- *
161
- * @param nodes - Array of nodes to generate text for
162
- * @param config - Optional configuration
163
- * @returns Array of texts in the same order as input nodes
197
+ * Export truncation helper for testing
164
198
  */
165
- export const generateBatchEmbeddingTexts = (nodes, config = {}) => {
166
- return nodes.map((node) => generateEmbeddingText(node, config));
167
- };
199
+ export { truncateDescription };
@@ -4,15 +4,38 @@
4
4
  * Type definitions for the embedding generation and semantic search system.
5
5
  */
6
6
  /**
7
- * Node labels that should be embedded for semantic search
8
- * These are code elements that benefit from semantic matching
7
+ * Node labels that need chunking (have code body, potentially long)
9
8
  */
10
- export declare const EMBEDDABLE_LABELS: readonly ["Function", "Class", "Method", "Interface", "File"];
9
+ export declare const CHUNKABLE_LABELS: readonly ["Function", "Method", "Constructor", "Class", "Interface", "Struct", "Enum", "Trait", "Impl", "Macro", "Namespace"];
10
+ /**
11
+ * Node labels that are short (no chunking needed, embed directly)
12
+ */
13
+ export declare const SHORT_LABELS: readonly ["TypeAlias", "Typedef", "Const", "Property", "Record", "Union", "Static", "Variable"];
14
+ /**
15
+ * All embeddable labels (union of CHUNKABLE + SHORT)
16
+ */
17
+ export declare const EMBEDDABLE_LABELS: readonly ["Function", "Method", "Constructor", "Class", "Interface", "Struct", "Enum", "Trait", "Impl", "Macro", "Namespace", "TypeAlias", "Typedef", "Const", "Property", "Record", "Union", "Static", "Variable"];
11
18
  export type EmbeddableLabel = (typeof EMBEDDABLE_LABELS)[number];
12
19
  /**
13
20
  * Check if a label should be embedded
14
21
  */
15
22
  export declare const isEmbeddableLabel: (label: string) => label is EmbeddableLabel;
23
+ /**
24
+ * Check if a label needs chunking
25
+ */
26
+ export declare const isChunkableLabel: (label: string) => boolean;
27
+ /**
28
+ * Check if a label is a short type (no chunking)
29
+ */
30
+ export declare const isShortLabel: (label: string) => boolean;
31
+ /**
32
+ * Node labels that have structural names (methods/fields) extractable via AST
33
+ */
34
+ export declare const STRUCTURAL_LABELS: ReadonlySet<string>;
35
+ /**
36
+ * Node labels that have isExported column in their schema
37
+ */
38
+ export declare const LABELS_WITH_EXPORTED: ReadonlySet<string>;
16
39
  /**
17
40
  * Embedding pipeline phases
18
41
  */
@@ -44,6 +67,12 @@ export interface EmbeddingConfig {
44
67
  device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
45
68
  /** Maximum characters of code snippet to include */
46
69
  maxSnippetLength: number;
70
+ /** Maximum code chunk size in characters (for chunking long code) */
71
+ chunkSize: number;
72
+ /** Overlap between chunks in characters */
73
+ overlap: number;
74
+ /** Maximum description length in characters */
75
+ maxDescriptionLength: number;
47
76
  }
48
77
  /**
49
78
  * Default embedding configuration
@@ -74,6 +103,32 @@ export interface EmbeddableNode {
74
103
  content: string;
75
104
  startLine?: number;
76
105
  endLine?: number;
106
+ isExported?: boolean;
107
+ description?: string;
108
+ parameterCount?: number;
109
+ returnType?: string;
110
+ repoName?: string;
111
+ serverName?: string;
112
+ methodNames?: string[];
113
+ fieldNames?: string[];
114
+ }
115
+ /**
116
+ * Cached embedding entry restored from LadybugDB before a graph rebuild
117
+ */
118
+ export interface CachedEmbedding {
119
+ nodeId: string;
120
+ chunkIndex: number;
121
+ startLine: number;
122
+ endLine: number;
123
+ embedding: number[];
124
+ contentHash?: string;
125
+ }
126
+ /**
127
+ * Context info for embedding pipeline (repo/server metadata enrichment)
128
+ */
129
+ export interface EmbeddingContext {
130
+ repoName?: string;
131
+ serverName?: string;
77
132
  }
78
133
  /**
79
134
  * Model download progress from transformers.js
@@ -85,3 +140,26 @@ export interface ModelProgress {
85
140
  loaded?: number;
86
141
  total?: number;
87
142
  }
143
+ export interface ChunkSearchRow {
144
+ nodeId: string;
145
+ chunkIndex: number;
146
+ startLine: number;
147
+ endLine: number;
148
+ distance: number;
149
+ }
150
+ export interface BestChunkMatch {
151
+ chunkIndex: number;
152
+ startLine: number;
153
+ endLine: number;
154
+ distance: number;
155
+ }
156
+ /**
157
+ * Deduplicate vector search chunk results by nodeId,
158
+ * keeping the chunk with smallest distance for each node.
159
+ */
160
+ export declare const dedupBestChunks: (rows: ChunkSearchRow[], limit?: number) => Map<string, BestChunkMatch>;
161
+ /**
162
+ * Fetch vector-search chunks until we have enough unique nodeIds
163
+ * or can tell the result set is exhausted.
164
+ */
165
+ export declare const collectBestChunks: (limit: number, fetchRows: (fetchLimit: number) => Promise<ChunkSearchRow[]>, maxFetch?: number) => Promise<Map<string, BestChunkMatch>>;
@@ -4,14 +4,69 @@
4
4
  * Type definitions for the embedding generation and semantic search system.
5
5
  */
6
6
  /**
7
- * Node labels that should be embedded for semantic search
8
- * These are code elements that benefit from semantic matching
7
+ * Node labels that need chunking (have code body, potentially long)
9
8
  */
10
- export const EMBEDDABLE_LABELS = ['Function', 'Class', 'Method', 'Interface', 'File'];
9
+ export const CHUNKABLE_LABELS = [
10
+ 'Function',
11
+ 'Method',
12
+ 'Constructor',
13
+ 'Class',
14
+ 'Interface',
15
+ 'Struct',
16
+ 'Enum',
17
+ 'Trait',
18
+ 'Impl',
19
+ 'Macro',
20
+ 'Namespace',
21
+ ];
22
+ /**
23
+ * Node labels that are short (no chunking needed, embed directly)
24
+ */
25
+ export const SHORT_LABELS = [
26
+ 'TypeAlias',
27
+ 'Typedef',
28
+ 'Const',
29
+ 'Property',
30
+ 'Record',
31
+ 'Union',
32
+ 'Static',
33
+ 'Variable',
34
+ ];
35
+ /**
36
+ * All embeddable labels (union of CHUNKABLE + SHORT)
37
+ */
38
+ export const EMBEDDABLE_LABELS = [...CHUNKABLE_LABELS, ...SHORT_LABELS];
11
39
  /**
12
40
  * Check if a label should be embedded
13
41
  */
14
42
  export const isEmbeddableLabel = (label) => EMBEDDABLE_LABELS.includes(label);
43
+ /**
44
+ * Check if a label needs chunking
45
+ */
46
+ export const isChunkableLabel = (label) => CHUNKABLE_LABELS.includes(label);
47
+ /**
48
+ * Check if a label is a short type (no chunking)
49
+ */
50
+ export const isShortLabel = (label) => SHORT_LABELS.includes(label);
51
+ /**
52
+ * Node labels that have structural names (methods/fields) extractable via AST
53
+ */
54
+ export const STRUCTURAL_LABELS = new Set([
55
+ 'Class',
56
+ 'Struct',
57
+ 'Interface',
58
+ 'Enum',
59
+ ]);
60
+ /**
61
+ * Node labels that have isExported column in their schema
62
+ */
63
+ export const LABELS_WITH_EXPORTED = new Set([
64
+ 'Function',
65
+ 'Class',
66
+ 'Interface',
67
+ 'Method',
68
+ 'CodeElement',
69
+ ]);
15
70
  /**
16
71
  * Default embedding configuration
17
72
  * Uses snowflake-arctic-embed-xs for browser efficiency
@@ -23,4 +78,51 @@ export const DEFAULT_EMBEDDING_CONFIG = {
23
78
  dimensions: 384,
24
79
  device: 'auto',
25
80
  maxSnippetLength: 500,
81
+ chunkSize: 1200,
82
+ overlap: 120,
83
+ maxDescriptionLength: 150,
84
+ };
85
+ /**
86
+ * Deduplicate vector search chunk results by nodeId,
87
+ * keeping the chunk with smallest distance for each node.
88
+ */
89
+ export const dedupBestChunks = (rows, limit) => {
90
+ const best = new Map();
91
+ for (const row of rows) {
92
+ const existing = best.get(row.nodeId);
93
+ if (!existing || row.distance < existing.distance) {
94
+ best.set(row.nodeId, {
95
+ chunkIndex: row.chunkIndex,
96
+ startLine: row.startLine,
97
+ endLine: row.endLine,
98
+ distance: row.distance,
99
+ });
100
+ }
101
+ if (limit !== undefined && best.size >= limit)
102
+ break;
103
+ }
104
+ return best;
105
+ };
106
+ const DEFAULT_FETCH_MULTIPLIER = 4;
107
+ const DEFAULT_FETCH_BUFFER = 8;
108
+ const DEFAULT_MAX_FETCH = 200;
109
+ /**
110
+ * Fetch vector-search chunks until we have enough unique nodeIds
111
+ * or can tell the result set is exhausted.
112
+ */
113
+ export const collectBestChunks = async (limit, fetchRows, maxFetch = DEFAULT_MAX_FETCH) => {
114
+ if (limit <= 0)
115
+ return new Map();
116
+ let fetchLimit = Math.max(limit * DEFAULT_FETCH_MULTIPLIER, limit + DEFAULT_FETCH_BUFFER);
117
+ let previousFetchLimit = 0;
118
+ while (fetchLimit > previousFetchLimit) {
119
+ const rows = await fetchRows(fetchLimit);
120
+ const bestChunks = dedupBestChunks(rows, limit);
121
+ if (bestChunks.size >= limit || rows.length < fetchLimit) {
122
+ return bestChunks;
123
+ }
124
+ previousFetchLimit = fetchLimit;
125
+ fetchLimit = fetchLimit >= maxFetch ? fetchLimit * 2 : Math.min(maxFetch, fetchLimit * 2);
126
+ }
127
+ return new Map();
26
128
  };