gitnexus 1.6.2-rc.2 → 1.6.2-rc.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/dist/_shared/lbug/schema-constants.d.ts +1 -1
  2. package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
  3. package/dist/_shared/lbug/schema-constants.js +1 -0
  4. package/dist/_shared/lbug/schema-constants.js.map +1 -1
  5. package/dist/cli/analyze.js +3 -0
  6. package/dist/core/embeddings/ast-utils.d.ts +22 -0
  7. package/dist/core/embeddings/ast-utils.js +105 -0
  8. package/dist/core/embeddings/character-chunk.d.ts +12 -0
  9. package/dist/core/embeddings/character-chunk.js +43 -0
  10. package/dist/core/embeddings/chunker.d.ts +14 -0
  11. package/dist/core/embeddings/chunker.js +234 -0
  12. package/dist/core/embeddings/embedder.js +5 -0
  13. package/dist/core/embeddings/embedding-pipeline.d.ts +29 -24
  14. package/dist/core/embeddings/embedding-pipeline.js +244 -125
  15. package/dist/core/embeddings/line-index.d.ts +7 -0
  16. package/dist/core/embeddings/line-index.js +42 -0
  17. package/dist/core/embeddings/server-mapping.d.ts +15 -0
  18. package/dist/core/embeddings/server-mapping.js +33 -0
  19. package/dist/core/embeddings/structural-extractor.d.ts +15 -0
  20. package/dist/core/embeddings/structural-extractor.js +58 -0
  21. package/dist/core/embeddings/text-generator.d.ts +20 -13
  22. package/dist/core/embeddings/text-generator.js +151 -119
  23. package/dist/core/embeddings/types.d.ts +81 -3
  24. package/dist/core/embeddings/types.js +105 -3
  25. package/dist/core/group/extractors/http-patterns/node.js +130 -0
  26. package/dist/core/group/extractors/manifest-extractor.js +20 -5
  27. package/dist/core/group/sync.js +49 -1
  28. package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
  29. package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
  30. package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
  31. package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
  32. package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
  33. package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
  34. package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
  35. package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
  36. package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
  37. package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
  38. package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
  39. package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
  40. package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
  41. package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
  42. package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
  43. package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
  44. package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
  45. package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
  46. package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
  47. package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
  48. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
  49. package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
  50. package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
  51. package/dist/core/ingestion/call-extractors/generic.js +59 -0
  52. package/dist/core/ingestion/call-processor.d.ts +1 -3
  53. package/dist/core/ingestion/call-processor.js +49 -47
  54. package/dist/core/ingestion/call-types.d.ts +60 -0
  55. package/dist/core/ingestion/call-types.js +2 -0
  56. package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
  57. package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
  58. package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
  59. package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
  60. package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
  61. package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
  62. package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
  63. package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
  64. package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
  65. package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
  66. package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
  67. package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
  68. package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
  69. package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
  70. package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
  71. package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
  72. package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
  73. package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
  74. package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
  75. package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
  76. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
  77. package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
  78. package/dist/core/ingestion/field-types.d.ts +1 -1
  79. package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
  80. package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
  81. package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
  82. package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
  83. package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
  84. package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
  85. package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
  86. package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
  87. package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
  88. package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
  89. package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
  90. package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
  91. package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
  92. package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
  93. package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
  94. package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
  95. package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
  96. package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
  97. package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
  98. package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
  99. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
  100. package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
  101. package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
  102. package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
  103. package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
  104. package/dist/core/ingestion/import-resolvers/go.js +4 -19
  105. package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
  106. package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
  107. package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
  108. package/dist/core/ingestion/import-resolvers/php.js +4 -7
  109. package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
  110. package/dist/core/ingestion/import-resolvers/python.js +3 -18
  111. package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
  112. package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
  113. package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
  114. package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
  115. package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
  116. package/dist/core/ingestion/import-resolvers/rust.js +4 -47
  117. package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
  118. package/dist/core/ingestion/import-resolvers/standard.js +7 -8
  119. package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
  120. package/dist/core/ingestion/language-provider.d.ts +12 -0
  121. package/dist/core/ingestion/languages/c-cpp.js +15 -12
  122. package/dist/core/ingestion/languages/csharp.js +11 -21
  123. package/dist/core/ingestion/languages/dart.js +11 -7
  124. package/dist/core/ingestion/languages/go.js +11 -20
  125. package/dist/core/ingestion/languages/java.js +11 -18
  126. package/dist/core/ingestion/languages/kotlin.js +11 -13
  127. package/dist/core/ingestion/languages/php.js +11 -7
  128. package/dist/core/ingestion/languages/python.js +11 -7
  129. package/dist/core/ingestion/languages/ruby.js +11 -7
  130. package/dist/core/ingestion/languages/rust.js +11 -7
  131. package/dist/core/ingestion/languages/swift.js +11 -18
  132. package/dist/core/ingestion/languages/typescript.js +15 -23
  133. package/dist/core/ingestion/languages/vue.js +11 -17
  134. package/dist/core/ingestion/model/index.d.ts +2 -2
  135. package/dist/core/ingestion/model/index.js +1 -1
  136. package/dist/core/ingestion/model/resolve.d.ts +3 -0
  137. package/dist/core/ingestion/model/resolve.js +6 -2
  138. package/dist/core/ingestion/parsing-processor.d.ts +1 -2
  139. package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
  140. package/dist/core/ingestion/tree-sitter-queries.js +81 -0
  141. package/dist/core/ingestion/type-env.d.ts +1 -1
  142. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
  143. package/dist/core/ingestion/utils/ast-helpers.js +3 -0
  144. package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
  145. package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
  146. package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
  147. package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
  148. package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
  149. package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
  150. package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
  151. package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
  152. package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
  153. package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
  154. package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
  155. package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
  156. package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
  157. package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
  158. package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
  159. package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
  160. package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
  161. package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
  162. package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
  163. package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
  164. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
  165. package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
  166. package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
  167. package/dist/core/ingestion/variable-extractors/generic.js +80 -0
  168. package/dist/core/ingestion/variable-types.d.ts +82 -0
  169. package/dist/core/ingestion/variable-types.js +2 -0
  170. package/dist/core/ingestion/workers/parse-worker.js +196 -166
  171. package/dist/core/ingestion/workers/worker-pool.js +3 -0
  172. package/dist/core/lbug/csv-generator.js +1 -0
  173. package/dist/core/lbug/lbug-adapter.d.ts +13 -4
  174. package/dist/core/lbug/lbug-adapter.js +166 -81
  175. package/dist/core/lbug/schema.d.ts +9 -1
  176. package/dist/core/lbug/schema.js +19 -2
  177. package/dist/core/run-analyze.js +17 -4
  178. package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
  179. package/dist/core/tree-sitter/parser-loader.js +17 -8
  180. package/dist/mcp/core/embedder.js +5 -0
  181. package/dist/mcp/local/local-backend.js +29 -19
  182. package/dist/server/api.js +10 -21
  183. package/package.json +5 -3
  184. package/scripts/build-tree-sitter-proto.cjs +82 -0
  185. package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
  186. package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
  187. package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
  188. package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
  189. package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
  190. package/vendor/tree-sitter-proto/package.json +1 -7
  191. package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
  192. package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
  193. package/dist/core/ingestion/call-sites/java.d.ts +0 -9
  194. package/dist/core/ingestion/call-sites/java.js +0 -30
  195. package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
  196. package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
  197. package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
  198. package/dist/core/ingestion/import-resolvers/vue.js +0 -9
@@ -7,7 +7,7 @@
7
7
  * Full DDL schemas remain in each package's own schema.ts because
8
8
  * the CLI uses native LadybugDB and the web uses WASM.
9
9
  */
10
- export declare const NODE_TABLES: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Section", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module", "Route", "Tool"];
10
+ export declare const NODE_TABLES: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Section", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Variable", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module", "Route", "Tool"];
11
11
  export type NodeTableName = (typeof NODE_TABLES)[number];
12
12
  export declare const REL_TABLE_NAME = "CodeRelation";
13
13
  export declare const REL_TYPES: readonly ["CONTAINS", "DEFINES", "IMPORTS", "CALLS", "EXTENDS", "IMPLEMENTS", "HAS_METHOD", "HAS_PROPERTY", "ACCESSES", "METHOD_OVERRIDES", "OVERRIDES", "METHOD_IMPLEMENTS", "MEMBER_OF", "STEP_IN_PROCESS", "HANDLES_ROUTE", "FETCHES", "HANDLES_TOOL", "ENTRY_POINT_OF", "WRAPS", "QUERIES"];
@@ -1 +1 @@
1
- {"version":3,"file":"schema-constants.d.ts","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,eAAO,MAAM,WAAW,8UA+Bd,CAAC;AAEX,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,eAAO,MAAM,cAAc,iBAAiB,CAAC;AAE7C,eAAO,MAAM,SAAS,iSAqBZ,CAAC;AAEX,MAAM,MAAM,OAAO,GAAG,CAAC,OAAO,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;AAEjD,eAAO,MAAM,oBAAoB,kBAAkB,CAAC"}
1
+ {"version":3,"file":"schema-constants.d.ts","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,eAAO,MAAM,WAAW,0VAgCd,CAAC;AAEX,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,eAAO,MAAM,cAAc,iBAAiB,CAAC;AAE7C,eAAO,MAAM,SAAS,iSAqBZ,CAAC;AAEX,MAAM,MAAM,OAAO,GAAG,CAAC,OAAO,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;AAEjD,eAAO,MAAM,oBAAoB,kBAAkB,CAAC"}
@@ -29,6 +29,7 @@ export const NODE_TABLES = [
29
29
  'TypeAlias',
30
30
  'Const',
31
31
  'Static',
32
+ 'Variable',
32
33
  'Property',
33
34
  'Record',
34
35
  'Delegate',
@@ -1 +1 @@
1
- {"version":3,"file":"schema-constants.js","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,MAAM;IACN,QAAQ;IACR,UAAU;IACV,OAAO;IACP,WAAW;IACX,QAAQ;IACR,aAAa;IACb,WAAW;IACX,SAAS;IACT,SAAS;IACT,QAAQ;IACR,MAAM;IACN,OAAO;IACP,SAAS;IACT,OAAO;IACP,WAAW;IACX,OAAO;IACP,MAAM;IACN,WAAW;IACX,OAAO;IACP,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACV,QAAQ;IACR,OAAO;IACP,MAAM;CACE,CAAC;AAIX,MAAM,CAAC,MAAM,cAAc,GAAG,cAAc,CAAC;AAE7C,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,UAAU;IACV,SAAS;IACT,SAAS;IACT,OAAO;IACP,SAAS;IACT,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,UAAU;IACV,kBAAkB;IAClB,WAAW,EAAE,mEAAmE;IAChF,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,eAAe;IACf,SAAS;IACT,cAAc;IACd,gBAAgB;IAChB,OAAO;IACP,SAAS;CACD,CAAC;AAIX,MAAM,CAAC,MAAM,oBAAoB,GAAG,eAAe,CAAC"}
1
+ {"version":3,"file":"schema-constants.js","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,MAAM;IACN,QAAQ;IACR,UAAU;IACV,OAAO;IACP,WAAW;IACX,QAAQ;IACR,aAAa;IACb,WAAW;IACX,SAAS;IACT,SAAS;IACT,QAAQ;IACR,MAAM;IACN,OAAO;IACP,SAAS;IACT,OAAO;IACP,WAAW;IACX,OAAO;IACP,MAAM;IACN,WAAW;IACX,OAAO;IACP,QAAQ;IACR,UAAU;IACV,UAAU;IACV,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACV,QAAQ;IACR,OAAO;IACP,MAAM;CACE,CAAC;AAIX,MAAM,CAAC,MAAM,cAAc,GAAG,cAAc,CAAC;AAE7C,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,UAAU;IACV,SAAS;IACT,SAAS;IACT,OAAO;IACP,SAAS;IACT,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,UAAU;IACV,kBAAkB;IAClB,WAAW,EAAE,mEAAmE;IAChF,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,eAAe;IACf,SAAS;IACT,cAAc;IACd,gBAAgB;IAChB,OAAO;IACP,SAAS;CACD,CAAC;AAIX,MAAM,CAAC,MAAM,oBAAoB,GAAG,eAAe,CAAC"}
@@ -114,9 +114,11 @@ export const analyzeCommand = async (inputPath, options) => {
114
114
  const origLog = console.log.bind(console);
115
115
  const origWarn = console.warn.bind(console);
116
116
  const origError = console.error.bind(console);
117
+ let barCurrentValue = 0;
117
118
  const barLog = (...args) => {
118
119
  process.stdout.write('\x1b[2K\r');
119
120
  origLog(args.map((a) => (typeof a === 'string' ? a : String(a))).join(' '));
121
+ bar.update(barCurrentValue);
120
122
  };
121
123
  console.log = barLog;
122
124
  console.warn = barLog;
@@ -125,6 +127,7 @@ export const analyzeCommand = async (inputPath, options) => {
125
127
  let lastPhaseLabel = 'Initializing...';
126
128
  let phaseStart = Date.now();
127
129
  const updateBar = (value, phaseLabel) => {
130
+ barCurrentValue = value;
128
131
  if (phaseLabel !== lastPhaseLabel) {
129
132
  lastPhaseLabel = phaseLabel;
130
133
  phaseStart = Date.now();
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Shared AST utilities for the embedding pipeline.
3
+ * Centralizes parser caching and tree-sitter node lookups
4
+ * used by both chunker.ts and structural-extractor.ts.
5
+ */
6
+ /**
7
+ * Ensure parser is initialized and language is loaded, then parse content.
8
+ * Returns null if language is unavailable or parsing fails.
9
+ */
10
+ export declare const ensureAndParse: (content: string, filePath: string) => Promise<any | null>;
11
+ /**
12
+ * Find the first function/method-like declaration in a snippet AST.
13
+ * Used by the chunker when parsing node.content where absolute line
14
+ * numbers don't apply.
15
+ */
16
+ export declare const findFunctionNode: (root: any) => any | null;
17
+ /**
18
+ * Find the first class/struct/interface/enum-like declaration in an AST.
19
+ * Used when parsing node.content (a snippet, not a full file) where
20
+ * absolute line numbers don't apply.
21
+ */
22
+ export declare const findDeclarationNode: (root: any) => any | null;
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Shared AST utilities for the embedding pipeline.
3
+ * Centralizes parser caching and tree-sitter node lookups
4
+ * used by both chunker.ts and structural-extractor.ts.
5
+ */
6
+ import { getLanguageFromFilename } from '../../_shared/index.js';
7
+ import { createParserForLanguage, isLanguageAvailable, resolveLanguageKey, } from '../tree-sitter/parser-loader.js';
8
+ const parserCache = new Map();
9
+ /**
10
+ * Ensure parser is initialized and language is loaded, then parse content.
11
+ * Returns null if language is unavailable or parsing fails.
12
+ */
13
+ export const ensureAndParse = async (content, filePath) => {
14
+ const language = getLanguageFromFilename(filePath);
15
+ if (!language)
16
+ return null;
17
+ if (!isLanguageAvailable(language))
18
+ return null;
19
+ const parserKey = resolveLanguageKey(language, filePath);
20
+ let parserInstance = parserCache.get(parserKey);
21
+ if (!parserInstance) {
22
+ parserInstance = await createParserForLanguage(language, filePath);
23
+ parserCache.set(parserKey, parserInstance);
24
+ }
25
+ return parserInstance.parse(content);
26
+ };
27
+ const FUNCTION_LIKE_TYPES = new Set([
28
+ 'function_declaration',
29
+ 'function_definition',
30
+ 'method_declaration',
31
+ 'method_definition',
32
+ 'function_item',
33
+ 'function_signature_item',
34
+ 'arrow_function',
35
+ 'function_expression',
36
+ 'generator_function_declaration',
37
+ 'generator_function',
38
+ 'async_function_declaration',
39
+ 'async_arrow_function',
40
+ 'constructor_declaration',
41
+ 'constructor_definition',
42
+ 'compact_constructor_declaration',
43
+ 'short_function_declaration',
44
+ 'proc_declaration',
45
+ 'func_literal',
46
+ 'local_function_statement',
47
+ 'anonymous_function',
48
+ 'lambda_literal',
49
+ 'init_declaration',
50
+ 'deinit_declaration',
51
+ ]);
52
+ /**
53
+ * Find the first function/method-like declaration in a snippet AST.
54
+ * Used by the chunker when parsing node.content where absolute line
55
+ * numbers don't apply.
56
+ */
57
+ export const findFunctionNode = (root) => {
58
+ if (FUNCTION_LIKE_TYPES.has(root.type))
59
+ return root;
60
+ for (let i = 0; i < root.namedChildCount; i++) {
61
+ const child = root.namedChild(i);
62
+ if (!child)
63
+ continue;
64
+ if (FUNCTION_LIKE_TYPES.has(child.type))
65
+ return child;
66
+ const found = findFunctionNode(child);
67
+ if (found)
68
+ return found;
69
+ }
70
+ return null;
71
+ };
72
+ /**
73
+ * Find the first class/struct/interface/enum-like declaration in an AST.
74
+ * Used when parsing node.content (a snippet, not a full file) where
75
+ * absolute line numbers don't apply.
76
+ */
77
+ export const findDeclarationNode = (root) => {
78
+ const CLASS_LIKE_TYPES = new Set([
79
+ 'class_declaration',
80
+ 'class_definition',
81
+ 'struct_declaration',
82
+ 'struct_item',
83
+ 'interface_declaration',
84
+ 'interface_definition',
85
+ 'enum_declaration',
86
+ 'enum_item',
87
+ 'type_declaration', // Go: type X struct
88
+ 'declaration', // Go: type X struct
89
+ 'object_declaration', // Kotlin: object
90
+ 'impl_item', // Rust: impl
91
+ ]);
92
+ if (CLASS_LIKE_TYPES.has(root.type))
93
+ return root;
94
+ for (let i = 0; i < root.namedChildCount; i++) {
95
+ const child = root.namedChild(i);
96
+ if (!child)
97
+ continue;
98
+ if (CLASS_LIKE_TYPES.has(child.type))
99
+ return child;
100
+ const found = findDeclarationNode(child);
101
+ if (found)
102
+ return found;
103
+ }
104
+ return null;
105
+ };
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Character-based sliding window chunking (pure, no tree-sitter dependency)
3
+ */
4
+ export interface Chunk {
5
+ text: string;
6
+ chunkIndex: number;
7
+ startOffset: number;
8
+ endOffset: number;
9
+ startLine: number;
10
+ endLine: number;
11
+ }
12
+ export declare const characterChunk: (content: string, startLine: number, endLine: number, chunkSize?: number, overlap?: number) => Chunk[];
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Character-based sliding window chunking (pure, no tree-sitter dependency)
3
+ */
4
+ import { buildLineIndex, resolveChunkLines } from './line-index.js';
5
+ export const characterChunk = (content, startLine, endLine, chunkSize = 1200, overlap = 120) => {
6
+ if (content.length <= chunkSize) {
7
+ return [
8
+ {
9
+ text: content,
10
+ chunkIndex: 0,
11
+ startOffset: 0,
12
+ endOffset: content.length,
13
+ startLine,
14
+ endLine,
15
+ },
16
+ ];
17
+ }
18
+ const chunks = [];
19
+ let offset = 0;
20
+ const lineOffsets = buildLineIndex(content);
21
+ while (offset < content.length) {
22
+ const end = Math.min(offset + chunkSize, content.length);
23
+ const chunkText = content.slice(offset, end);
24
+ const lineRange = resolveChunkLines(lineOffsets, offset, end, startLine);
25
+ chunks.push({
26
+ text: chunkText,
27
+ chunkIndex: chunks.length,
28
+ startOffset: offset,
29
+ endOffset: end,
30
+ startLine: lineRange.startLine,
31
+ endLine: lineRange.endLine,
32
+ });
33
+ offset = end - overlap;
34
+ if (offset >= content.length)
35
+ break;
36
+ if (end >= content.length)
37
+ break;
38
+ if (offset <= (chunks.length > 1 ? end - chunkSize : 0)) {
39
+ offset = end;
40
+ }
41
+ }
42
+ return chunks;
43
+ };
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Chunker Module
3
+ *
4
+ * Splits code nodes into chunks for embedding.
5
+ * - Function/Method: AST-aware chunking by statement boundaries
6
+ * - Other types: character-based sliding window fallback
7
+ * - Short content (≤ chunkSize): no chunking
8
+ */
9
+ export { type Chunk, characterChunk } from './character-chunk.js';
10
+ import type { Chunk } from './character-chunk.js';
11
+ /**
12
+ * Main chunkNode function: dispatches by label
13
+ */
14
+ export declare const chunkNode: (label: string, content: string, filePath: string, startLine: number, endLine: number, chunkSize?: number, overlap?: number) => Promise<Chunk[]>;
@@ -0,0 +1,234 @@
1
+ /**
2
+ * Chunker Module
3
+ *
4
+ * Splits code nodes into chunks for embedding.
5
+ * - Function/Method: AST-aware chunking by statement boundaries
6
+ * - Other types: character-based sliding window fallback
7
+ * - Short content (≤ chunkSize): no chunking
8
+ */
9
+ export { characterChunk } from './character-chunk.js';
10
+ import { characterChunk } from './character-chunk.js';
11
+ import { ensureAndParse, findDeclarationNode, findFunctionNode } from './ast-utils.js';
12
+ import { buildLineIndex, resolveChunkLines } from './line-index.js';
13
+ /**
14
+ * Main chunkNode function: dispatches by label
15
+ */
16
+ export const chunkNode = async (label, content, filePath, startLine, endLine, chunkSize = 1200, overlap = 120) => {
17
+ // Content fits in one chunk — no splitting needed
18
+ if (content.length <= chunkSize) {
19
+ return [
20
+ {
21
+ text: content,
22
+ chunkIndex: 0,
23
+ startOffset: 0,
24
+ endOffset: content.length,
25
+ startLine,
26
+ endLine,
27
+ },
28
+ ];
29
+ }
30
+ // Only function-like labels get AST chunking
31
+ if (label === 'Function' || label === 'Method' || label === 'Constructor') {
32
+ try {
33
+ const astChunks = await astChunk(content, filePath, startLine, endLine, chunkSize, overlap);
34
+ if (astChunks.length > 0)
35
+ return astChunks;
36
+ }
37
+ catch {
38
+ // AST parsing failed — fall through to character fallback
39
+ }
40
+ }
41
+ if (label === 'Class' || label === 'Interface') {
42
+ try {
43
+ const declarationChunks = await declarationChunk(label, content, filePath, startLine, endLine, chunkSize, overlap);
44
+ if (declarationChunks.length > 0)
45
+ return declarationChunks;
46
+ }
47
+ catch {
48
+ // AST parsing failed — fall through to character fallback
49
+ }
50
+ }
51
+ // Character-based fallback for everything else
52
+ return characterChunk(content, startLine, endLine, chunkSize, overlap);
53
+ };
54
+ /**
55
+ * AST-based chunking for Function/Method
56
+ * Parse snippet content, locate the function declaration node,
57
+ * split body by statement boundaries.
58
+ */
59
+ const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap) => {
60
+ const tree = await ensureAndParse(content, filePath);
61
+ if (!tree)
62
+ return [];
63
+ const root = tree.rootNode;
64
+ const lineOffsets = buildLineIndex(content);
65
+ // Find the function/method declaration in the snippet AST.
66
+ // tree-sitter parses node.content (a snippet), so rows are relative (0-based).
67
+ const targetNode = findFunctionNode(root);
68
+ if (!targetNode)
69
+ return [];
70
+ // Get the body (statements) via childForFieldName('body')
71
+ const bodyNode = targetNode.childForFieldName('body');
72
+ if (!bodyNode)
73
+ return [];
74
+ // Extract individual statements
75
+ const statements = [];
76
+ for (let i = 0; i < bodyNode.namedChildCount; i++) {
77
+ const child = bodyNode.namedChild(i);
78
+ if (!child)
79
+ continue;
80
+ statements.push({
81
+ startIndex: child.startIndex,
82
+ endIndex: child.endIndex,
83
+ });
84
+ }
85
+ if (statements.length === 0)
86
+ return [];
87
+ return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex, true, true);
88
+ };
89
+ const DECLARATION_BODY_NODE_TYPES = new Set([
90
+ 'class_body',
91
+ 'object_type',
92
+ 'declaration_list',
93
+ 'interface_body',
94
+ ]);
95
+ const FIELD_LIKE_MEMBER_TYPES = new Set([
96
+ 'field_definition',
97
+ 'public_field_definition',
98
+ 'property_definition',
99
+ 'property_signature',
100
+ 'variable_declarator',
101
+ 'lexical_declaration',
102
+ 'pair',
103
+ 'enum_assignment',
104
+ ]);
105
+ const declarationChunk = async (label, content, filePath, startLine, endLine, chunkSize, overlap) => {
106
+ const tree = await ensureAndParse(content, filePath);
107
+ if (!tree)
108
+ return [];
109
+ const targetNode = findDeclarationNode(tree.rootNode);
110
+ if (!targetNode)
111
+ return [];
112
+ const bodyNode = getDeclarationBodyNode(targetNode);
113
+ if (!bodyNode)
114
+ return [];
115
+ const members = collectDeclarationUnits(bodyNode, label);
116
+ if (members.length === 0)
117
+ return [];
118
+ return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex, false, false);
119
+ };
120
+ const buildChunk = (content, lineOffsets, chunkIndex, startOffset, endOffset, baseStartLine) => {
121
+ const lineRange = resolveChunkLines(lineOffsets, startOffset, endOffset, baseStartLine);
122
+ return {
123
+ text: content.slice(startOffset, endOffset),
124
+ chunkIndex,
125
+ startOffset,
126
+ endOffset,
127
+ startLine: lineRange.startLine,
128
+ endLine: lineRange.endLine,
129
+ };
130
+ };
131
+ const chunkByUnits = (content, lineOffsets, baseStartLine, chunkSize, overlap, units, containerStartOffset, containerEndOffset, includeContainerPrefixOnFirstChunk, includeContainerSuffixOnLastChunk) => {
132
+ const chunks = [];
133
+ let chunkStartUnitIdx = 0;
134
+ while (chunkStartUnitIdx < units.length) {
135
+ const chunkStartOffset = chunkStartUnitIdx === 0 && includeContainerPrefixOnFirstChunk
136
+ ? containerStartOffset
137
+ : units[chunkStartUnitIdx].startIndex;
138
+ let chunkEndUnitIdx = chunkStartUnitIdx;
139
+ let candidateEndOffset = chunkEndUnitIdx === units.length - 1 && includeContainerSuffixOnLastChunk
140
+ ? containerEndOffset
141
+ : units[chunkEndUnitIdx].endIndex;
142
+ while (chunkEndUnitIdx + 1 < units.length) {
143
+ const nextEndOffset = chunkEndUnitIdx + 1 === units.length - 1 && includeContainerSuffixOnLastChunk
144
+ ? containerEndOffset
145
+ : units[chunkEndUnitIdx + 1].endIndex;
146
+ if (nextEndOffset - chunkStartOffset > chunkSize)
147
+ break;
148
+ chunkEndUnitIdx += 1;
149
+ candidateEndOffset = nextEndOffset;
150
+ }
151
+ if (candidateEndOffset - chunkStartOffset > chunkSize) {
152
+ const oversizedUnit = units[chunkStartUnitIdx];
153
+ const oversizedLineRange = resolveChunkLines(lineOffsets, oversizedUnit.startIndex, oversizedUnit.endIndex, baseStartLine);
154
+ const oversizedChunks = characterChunk(content.slice(oversizedUnit.startIndex, oversizedUnit.endIndex), oversizedLineRange.startLine, oversizedLineRange.endLine, chunkSize, overlap).map((chunk, offsetIdx) => ({
155
+ ...chunk,
156
+ chunkIndex: chunks.length + offsetIdx,
157
+ startOffset: chunk.startOffset + oversizedUnit.startIndex,
158
+ endOffset: chunk.endOffset + oversizedUnit.startIndex,
159
+ }));
160
+ chunks.push(...oversizedChunks);
161
+ chunkStartUnitIdx += 1;
162
+ continue;
163
+ }
164
+ chunks.push(buildChunk(content, lineOffsets, chunks.length, chunkStartOffset, candidateEndOffset, baseStartLine));
165
+ if (chunkEndUnitIdx === units.length - 1) {
166
+ break;
167
+ }
168
+ const nextChunkStartUnitIdx = findOverlapStartIndex(units, chunkStartUnitIdx, chunkEndUnitIdx, overlap);
169
+ if (nextChunkStartUnitIdx <= chunkStartUnitIdx) {
170
+ chunkStartUnitIdx = chunkEndUnitIdx + 1;
171
+ }
172
+ else {
173
+ chunkStartUnitIdx = nextChunkStartUnitIdx;
174
+ }
175
+ }
176
+ return chunks;
177
+ };
178
+ const findOverlapStartIndex = (statements, chunkStartStmtIdx, chunkEndStmtIdx, overlapSize) => {
179
+ if (overlapSize <= 0)
180
+ return chunkEndStmtIdx + 1;
181
+ let overlapStartIdx = chunkEndStmtIdx;
182
+ while (overlapStartIdx > chunkStartStmtIdx) {
183
+ const overlapLength = statements[chunkEndStmtIdx].endIndex - statements[overlapStartIdx - 1].startIndex;
184
+ if (overlapLength > overlapSize)
185
+ break;
186
+ overlapStartIdx -= 1;
187
+ }
188
+ return overlapStartIdx;
189
+ };
190
+ const getDeclarationBodyNode = (node) => {
191
+ const bodyNode = node.childForFieldName?.('body');
192
+ if (bodyNode)
193
+ return bodyNode;
194
+ for (let i = 0; i < node.namedChildCount; i++) {
195
+ const child = node.namedChild(i);
196
+ if (!child)
197
+ continue;
198
+ if (DECLARATION_BODY_NODE_TYPES.has(child.type))
199
+ return child;
200
+ }
201
+ return null;
202
+ };
203
+ const collectDeclarationUnits = (bodyNode, label) => {
204
+ const members = [];
205
+ for (let i = 0; i < bodyNode.namedChildCount; i++) {
206
+ const child = bodyNode.namedChild(i);
207
+ if (!child)
208
+ continue;
209
+ members.push({
210
+ startIndex: child.startIndex,
211
+ endIndex: child.endIndex,
212
+ groupable: label === 'Class' && FIELD_LIKE_MEMBER_TYPES.has(child.type),
213
+ });
214
+ }
215
+ if (members.length === 0)
216
+ return [];
217
+ const grouped = [];
218
+ let current = members[0];
219
+ for (let i = 1; i < members.length; i++) {
220
+ const next = members[i];
221
+ if (current.groupable && next.groupable) {
222
+ current = {
223
+ startIndex: current.startIndex,
224
+ endIndex: next.endIndex,
225
+ groupable: true,
226
+ };
227
+ continue;
228
+ }
229
+ grouped.push({ startIndex: current.startIndex, endIndex: current.endIndex });
230
+ current = next;
231
+ }
232
+ grouped.push({ startIndex: current.startIndex, endIndex: current.endIndex });
233
+ return grouped;
234
+ };
@@ -131,6 +131,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
131
131
  try {
132
132
  // Configure transformers.js environment
133
133
  env.allowLocalModels = false;
134
+ // Default cache to user-writable location. transformers.js defaults to
135
+ // ./node_modules/.cache inside its own install dir, which is unwritable
136
+ // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
137
+ // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
138
+ env.cacheDir = process.env.HF_HOME ?? `${process.env.HOME}/.cache/huggingface`;
134
139
  const isDev = process.env.NODE_ENV === 'development';
135
140
  if (isDev) {
136
141
  console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
@@ -3,16 +3,34 @@
3
3
  *
4
4
  * Orchestrates the background embedding process:
5
5
  * 1. Query embeddable nodes from LadybugDB
6
- * 2. Generate text representations
7
- * 3. Batch embed using transformers.js
8
- * 4. Update LadybugDB with embeddings
6
+ * 2. Generate text representations with enriched metadata
7
+ * 3. Chunk long nodes, batch embed
8
+ * 4. Update LadybugDB with chunk-aware embeddings
9
9
  * 5. Create vector index for semantic search
10
10
  */
11
- import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
11
+ import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
12
+ /**
13
+ * Compute a stable content fingerprint for an embeddable node.
14
+ * Used to detect when the underlying text has changed so stale vectors
15
+ * can be replaced (DELETE-then-INSERT, the Kuzu-sanctioned pattern for
16
+ * vector-indexed rows).
17
+ */
18
+ export declare const contentHashForNode: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
12
19
  /**
13
20
  * Progress callback type
14
21
  */
15
22
  export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
23
+ /**
24
+ * Batch INSERT chunk-aware embeddings into CodeEmbedding table
25
+ */
26
+ export declare const batchInsertEmbeddings: (executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, updates: Array<{
27
+ nodeId: string;
28
+ chunkIndex: number;
29
+ startLine: number;
30
+ endLine: number;
31
+ embedding: number[];
32
+ contentHash?: string;
33
+ }>) => Promise<void>;
16
34
  /**
17
35
  * Run the embedding pipeline
18
36
  *
@@ -21,31 +39,18 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
21
39
  * @param onProgress - Callback for progress updates
22
40
  * @param config - Optional configuration override
23
41
  * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
42
+ * @param context - Optional repo/server context for metadata enrichment
43
+ * @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
44
+ * Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
45
+ * and re-embedded; nodes not in the map are embedded fresh.
46
+
24
47
  */
25
- export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
48
+ export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>, context?: EmbeddingContext, existingEmbeddings?: Map<string, string>) => Promise<void>;
26
49
  /**
27
- * Perform semantic search using the vector index
28
- *
29
- * Uses CodeEmbedding table and queries each node table to get metadata
30
- *
31
- * @param executeQuery - Function to execute Cypher queries
32
- * @param query - Search query text
33
- * @param k - Number of results to return (default: 10)
34
- * @param maxDistance - Maximum distance threshold (default: 0.5)
35
- * @returns Array of search results ordered by relevance
50
+ * Perform semantic search using the vector index with chunk deduplication
36
51
  */
37
52
  export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
38
53
  /**
39
54
  * Semantic search with graph expansion (flattened results)
40
- *
41
- * Note: With multi-table schema, graph traversal is simplified.
42
- * Returns semantic matches with their metadata.
43
- * For full graph traversal, use execute_vector_cypher tool directly.
44
- *
45
- * @param executeQuery - Function to execute Cypher queries
46
- * @param query - Search query text
47
- * @param k - Number of initial semantic matches (default: 5)
48
- * @param _hops - Unused (kept for API compatibility).
49
- * @returns Semantic matches with metadata
50
55
  */
51
56
  export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;