gitnexus 1.6.2-rc.2 → 1.6.2-rc.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/lbug/schema-constants.d.ts +1 -1
- package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
- package/dist/_shared/lbug/schema-constants.js +1 -0
- package/dist/_shared/lbug/schema-constants.js.map +1 -1
- package/dist/cli/analyze.js +3 -0
- package/dist/core/embeddings/ast-utils.d.ts +22 -0
- package/dist/core/embeddings/ast-utils.js +105 -0
- package/dist/core/embeddings/character-chunk.d.ts +12 -0
- package/dist/core/embeddings/character-chunk.js +43 -0
- package/dist/core/embeddings/chunker.d.ts +14 -0
- package/dist/core/embeddings/chunker.js +234 -0
- package/dist/core/embeddings/embedder.js +5 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +29 -24
- package/dist/core/embeddings/embedding-pipeline.js +244 -125
- package/dist/core/embeddings/line-index.d.ts +7 -0
- package/dist/core/embeddings/line-index.js +42 -0
- package/dist/core/embeddings/server-mapping.d.ts +15 -0
- package/dist/core/embeddings/server-mapping.js +33 -0
- package/dist/core/embeddings/structural-extractor.d.ts +15 -0
- package/dist/core/embeddings/structural-extractor.js +58 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -13
- package/dist/core/embeddings/text-generator.js +151 -119
- package/dist/core/embeddings/types.d.ts +81 -3
- package/dist/core/embeddings/types.js +105 -3
- package/dist/core/group/extractors/http-patterns/node.js +130 -0
- package/dist/core/group/extractors/manifest-extractor.js +20 -5
- package/dist/core/group/sync.js +49 -1
- package/dist/core/ingestion/call-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/c-cpp.js +8 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/csharp.js +6 -0
- package/dist/core/ingestion/call-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/dart.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/go.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/jvm.js +51 -0
- package/dist/core/ingestion/call-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/php.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/python.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/ruby.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/rust.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/call-extractors/configs/swift.js +5 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/call-extractors/configs/typescript-javascript.js +8 -0
- package/dist/core/ingestion/call-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/call-extractors/generic.js +59 -0
- package/dist/core/ingestion/call-processor.d.ts +1 -3
- package/dist/core/ingestion/call-processor.js +49 -47
- package/dist/core/ingestion/call-types.d.ts +60 -0
- package/dist/core/ingestion/call-types.js +2 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/c-cpp.js +11 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/csharp.js +21 -0
- package/dist/core/ingestion/class-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/dart.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/go.js +20 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.d.ts +3 -0
- package/dist/core/ingestion/class-extractors/configs/jvm.js +35 -0
- package/dist/core/ingestion/class-extractors/configs/php.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/php.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/python.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/ruby.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/rust.js +7 -0
- package/dist/core/ingestion/class-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/class-extractors/configs/swift.js +18 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.d.ts +4 -0
- package/dist/core/ingestion/class-extractors/configs/typescript-javascript.js +28 -0
- package/dist/core/ingestion/field-types.d.ts +1 -1
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.d.ts +7 -0
- package/dist/core/ingestion/import-resolvers/configs/c-cpp.js +14 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/csharp.js +27 -0
- package/dist/core/ingestion/import-resolvers/configs/dart.d.ts +17 -0
- package/dist/core/ingestion/import-resolvers/{dart.js → configs/dart.js} +26 -16
- package/dist/core/ingestion/import-resolvers/configs/go.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/go.js +26 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.d.ts +13 -0
- package/dist/core/ingestion/import-resolvers/configs/jvm.js +68 -0
- package/dist/core/ingestion/import-resolvers/configs/php.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/php.js +15 -0
- package/dist/core/ingestion/import-resolvers/configs/python.d.ts +12 -0
- package/dist/core/ingestion/import-resolvers/configs/python.js +41 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/ruby.js +16 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/configs/rust.js +54 -0
- package/dist/core/ingestion/import-resolvers/configs/swift.d.ts +8 -0
- package/dist/core/ingestion/import-resolvers/{swift.js → configs/swift.js} +10 -5
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.d.ts +9 -0
- package/dist/core/ingestion/import-resolvers/configs/typescript-javascript.js +23 -0
- package/dist/core/ingestion/import-resolvers/csharp.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/csharp.js +4 -20
- package/dist/core/ingestion/import-resolvers/go.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/go.js +4 -19
- package/dist/core/ingestion/import-resolvers/jvm.d.ts +5 -10
- package/dist/core/ingestion/import-resolvers/jvm.js +5 -58
- package/dist/core/ingestion/import-resolvers/php.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/php.js +4 -7
- package/dist/core/ingestion/import-resolvers/python.d.ts +3 -6
- package/dist/core/ingestion/import-resolvers/python.js +3 -18
- package/dist/core/ingestion/import-resolvers/resolver-factory.d.ts +24 -0
- package/dist/core/ingestion/import-resolvers/resolver-factory.js +33 -0
- package/dist/core/ingestion/import-resolvers/ruby.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/ruby.js +4 -7
- package/dist/core/ingestion/import-resolvers/rust.d.ts +4 -5
- package/dist/core/ingestion/import-resolvers/rust.js +4 -47
- package/dist/core/ingestion/import-resolvers/standard.d.ts +3 -9
- package/dist/core/ingestion/import-resolvers/standard.js +7 -8
- package/dist/core/ingestion/import-resolvers/types.d.ts +24 -0
- package/dist/core/ingestion/language-provider.d.ts +12 -0
- package/dist/core/ingestion/languages/c-cpp.js +15 -12
- package/dist/core/ingestion/languages/csharp.js +11 -21
- package/dist/core/ingestion/languages/dart.js +11 -7
- package/dist/core/ingestion/languages/go.js +11 -20
- package/dist/core/ingestion/languages/java.js +11 -18
- package/dist/core/ingestion/languages/kotlin.js +11 -13
- package/dist/core/ingestion/languages/php.js +11 -7
- package/dist/core/ingestion/languages/python.js +11 -7
- package/dist/core/ingestion/languages/ruby.js +11 -7
- package/dist/core/ingestion/languages/rust.js +11 -7
- package/dist/core/ingestion/languages/swift.js +11 -18
- package/dist/core/ingestion/languages/typescript.js +15 -23
- package/dist/core/ingestion/languages/vue.js +11 -17
- package/dist/core/ingestion/model/index.d.ts +2 -2
- package/dist/core/ingestion/model/index.js +1 -1
- package/dist/core/ingestion/model/resolve.d.ts +3 -0
- package/dist/core/ingestion/model/resolve.js +6 -2
- package/dist/core/ingestion/parsing-processor.d.ts +1 -2
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -11
- package/dist/core/ingestion/tree-sitter-queries.js +81 -0
- package/dist/core/ingestion/type-env.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -1
- package/dist/core/ingestion/utils/ast-helpers.js +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/c-cpp.js +81 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.d.ts +9 -0
- package/dist/core/ingestion/variable-extractors/configs/csharp.js +63 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/dart.js +94 -0
- package/dist/core/ingestion/variable-extractors/configs/go.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/go.js +83 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.d.ts +18 -0
- package/dist/core/ingestion/variable-extractors/configs/jvm.js +115 -0
- package/dist/core/ingestion/variable-extractors/configs/php.d.ts +14 -0
- package/dist/core/ingestion/variable-extractors/configs/php.js +58 -0
- package/dist/core/ingestion/variable-extractors/configs/python.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/python.js +101 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.d.ts +11 -0
- package/dist/core/ingestion/variable-extractors/configs/ruby.js +52 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/rust.js +76 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.d.ts +2 -0
- package/dist/core/ingestion/variable-extractors/configs/swift.js +88 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.d.ts +3 -0
- package/dist/core/ingestion/variable-extractors/configs/typescript-javascript.js +83 -0
- package/dist/core/ingestion/variable-extractors/generic.d.ts +5 -0
- package/dist/core/ingestion/variable-extractors/generic.js +80 -0
- package/dist/core/ingestion/variable-types.d.ts +82 -0
- package/dist/core/ingestion/variable-types.js +2 -0
- package/dist/core/ingestion/workers/parse-worker.js +196 -166
- package/dist/core/ingestion/workers/worker-pool.js +3 -0
- package/dist/core/lbug/csv-generator.js +1 -0
- package/dist/core/lbug/lbug-adapter.d.ts +13 -4
- package/dist/core/lbug/lbug-adapter.js +166 -81
- package/dist/core/lbug/schema.d.ts +9 -1
- package/dist/core/lbug/schema.js +19 -2
- package/dist/core/run-analyze.js +17 -4
- package/dist/core/tree-sitter/parser-loader.d.ts +3 -0
- package/dist/core/tree-sitter/parser-loader.js +17 -8
- package/dist/mcp/core/embedder.js +5 -0
- package/dist/mcp/local/local-backend.js +29 -19
- package/dist/server/api.js +10 -21
- package/package.json +5 -3
- package/scripts/build-tree-sitter-proto.cjs +82 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.Makefile +6 -0
- package/vendor/node_modules/node-addon-api/node_addon_api.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except.target.mk +108 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_except_all.target.mk +104 -0
- package/vendor/node_modules/node-addon-api/node_addon_api_maybe.target.mk +104 -0
- package/vendor/tree-sitter-proto/package.json +1 -7
- package/dist/core/ingestion/call-sites/extract-language-call-site.d.ts +0 -10
- package/dist/core/ingestion/call-sites/extract-language-call-site.js +0 -22
- package/dist/core/ingestion/call-sites/java.d.ts +0 -9
- package/dist/core/ingestion/call-sites/java.js +0 -30
- package/dist/core/ingestion/import-resolvers/dart.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/swift.d.ts +0 -7
- package/dist/core/ingestion/import-resolvers/vue.d.ts +0 -8
- package/dist/core/ingestion/import-resolvers/vue.js +0 -9
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* Full DDL schemas remain in each package's own schema.ts because
|
|
8
8
|
* the CLI uses native LadybugDB and the web uses WASM.
|
|
9
9
|
*/
|
|
10
|
-
export declare const NODE_TABLES: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Section", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module", "Route", "Tool"];
|
|
10
|
+
export declare const NODE_TABLES: readonly ["File", "Folder", "Function", "Class", "Interface", "Method", "CodeElement", "Community", "Process", "Section", "Struct", "Enum", "Macro", "Typedef", "Union", "Namespace", "Trait", "Impl", "TypeAlias", "Const", "Static", "Variable", "Property", "Record", "Delegate", "Annotation", "Constructor", "Template", "Module", "Route", "Tool"];
|
|
11
11
|
export type NodeTableName = (typeof NODE_TABLES)[number];
|
|
12
12
|
export declare const REL_TABLE_NAME = "CodeRelation";
|
|
13
13
|
export declare const REL_TYPES: readonly ["CONTAINS", "DEFINES", "IMPORTS", "CALLS", "EXTENDS", "IMPLEMENTS", "HAS_METHOD", "HAS_PROPERTY", "ACCESSES", "METHOD_OVERRIDES", "OVERRIDES", "METHOD_IMPLEMENTS", "MEMBER_OF", "STEP_IN_PROCESS", "HANDLES_ROUTE", "FETCHES", "HANDLES_TOOL", "ENTRY_POINT_OF", "WRAPS", "QUERIES"];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema-constants.d.ts","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,eAAO,MAAM,WAAW,
|
|
1
|
+
{"version":3,"file":"schema-constants.d.ts","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,eAAO,MAAM,WAAW,0VAgCd,CAAC;AAEX,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,eAAO,MAAM,cAAc,iBAAiB,CAAC;AAE7C,eAAO,MAAM,SAAS,iSAqBZ,CAAC;AAEX,MAAM,MAAM,OAAO,GAAG,CAAC,OAAO,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC;AAEjD,eAAO,MAAM,oBAAoB,kBAAkB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema-constants.js","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,MAAM;IACN,QAAQ;IACR,UAAU;IACV,OAAO;IACP,WAAW;IACX,QAAQ;IACR,aAAa;IACb,WAAW;IACX,SAAS;IACT,SAAS;IACT,QAAQ;IACR,MAAM;IACN,OAAO;IACP,SAAS;IACT,OAAO;IACP,WAAW;IACX,OAAO;IACP,MAAM;IACN,WAAW;IACX,OAAO;IACP,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACV,QAAQ;IACR,OAAO;IACP,MAAM;CACE,CAAC;AAIX,MAAM,CAAC,MAAM,cAAc,GAAG,cAAc,CAAC;AAE7C,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,UAAU;IACV,SAAS;IACT,SAAS;IACT,OAAO;IACP,SAAS;IACT,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,UAAU;IACV,kBAAkB;IAClB,WAAW,EAAE,mEAAmE;IAChF,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,eAAe;IACf,SAAS;IACT,cAAc;IACd,gBAAgB;IAChB,OAAO;IACP,SAAS;CACD,CAAC;AAIX,MAAM,CAAC,MAAM,oBAAoB,GAAG,eAAe,CAAC"}
|
|
1
|
+
{"version":3,"file":"schema-constants.js","sourceRoot":"","sources":["../../src/lbug/schema-constants.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG;IACzB,MAAM;IACN,QAAQ;IACR,UAAU;IACV,OAAO;IACP,WAAW;IACX,QAAQ;IACR,aAAa;IACb,WAAW;IACX,SAAS;IACT,SAAS;IACT,QAAQ;IACR,MAAM;IACN,OAAO;IACP,SAAS;IACT,OAAO;IACP,WAAW;IACX,OAAO;IACP,MAAM;IACN,WAAW;IACX,OAAO;IACP,QAAQ;IACR,UAAU;IACV,UAAU;IACV,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACV,QAAQ;IACR,OAAO;IACP,MAAM;CACE,CAAC;AAIX,MAAM,CAAC,MAAM,cAAc,GAAG,cAAc,CAAC;AAE7C,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,UAAU;IACV,SAAS;IACT,SAAS;IACT,OAAO;IACP,SAAS;IACT,YAAY;IACZ,YAAY;IACZ,cAAc;IACd,UAAU;IACV,kBAAkB;IAClB,WAAW,EAAE,mEAAmE;IAChF,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,eAAe;IACf,SAAS;IACT,cAAc;IACd,gBAAgB;IAChB,OAAO;IACP,SAAS;CACD,CAAC;AAIX,MAAM,CAAC,MAAM,oBAAoB,GAAG,eAAe,CAAC"}
|
package/dist/cli/analyze.js
CHANGED
|
@@ -114,9 +114,11 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
114
114
|
const origLog = console.log.bind(console);
|
|
115
115
|
const origWarn = console.warn.bind(console);
|
|
116
116
|
const origError = console.error.bind(console);
|
|
117
|
+
let barCurrentValue = 0;
|
|
117
118
|
const barLog = (...args) => {
|
|
118
119
|
process.stdout.write('\x1b[2K\r');
|
|
119
120
|
origLog(args.map((a) => (typeof a === 'string' ? a : String(a))).join(' '));
|
|
121
|
+
bar.update(barCurrentValue);
|
|
120
122
|
};
|
|
121
123
|
console.log = barLog;
|
|
122
124
|
console.warn = barLog;
|
|
@@ -125,6 +127,7 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
125
127
|
let lastPhaseLabel = 'Initializing...';
|
|
126
128
|
let phaseStart = Date.now();
|
|
127
129
|
const updateBar = (value, phaseLabel) => {
|
|
130
|
+
barCurrentValue = value;
|
|
128
131
|
if (phaseLabel !== lastPhaseLabel) {
|
|
129
132
|
lastPhaseLabel = phaseLabel;
|
|
130
133
|
phaseStart = Date.now();
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared AST utilities for the embedding pipeline.
|
|
3
|
+
* Centralizes parser caching and tree-sitter node lookups
|
|
4
|
+
* used by both chunker.ts and structural-extractor.ts.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Ensure parser is initialized and language is loaded, then parse content.
|
|
8
|
+
* Returns null if language is unavailable or parsing fails.
|
|
9
|
+
*/
|
|
10
|
+
export declare const ensureAndParse: (content: string, filePath: string) => Promise<any | null>;
|
|
11
|
+
/**
|
|
12
|
+
* Find the first function/method-like declaration in a snippet AST.
|
|
13
|
+
* Used by the chunker when parsing node.content where absolute line
|
|
14
|
+
* numbers don't apply.
|
|
15
|
+
*/
|
|
16
|
+
export declare const findFunctionNode: (root: any) => any | null;
|
|
17
|
+
/**
|
|
18
|
+
* Find the first class/struct/interface/enum-like declaration in an AST.
|
|
19
|
+
* Used when parsing node.content (a snippet, not a full file) where
|
|
20
|
+
* absolute line numbers don't apply.
|
|
21
|
+
*/
|
|
22
|
+
export declare const findDeclarationNode: (root: any) => any | null;
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared AST utilities for the embedding pipeline.
|
|
3
|
+
* Centralizes parser caching and tree-sitter node lookups
|
|
4
|
+
* used by both chunker.ts and structural-extractor.ts.
|
|
5
|
+
*/
|
|
6
|
+
import { getLanguageFromFilename } from '../../_shared/index.js';
|
|
7
|
+
import { createParserForLanguage, isLanguageAvailable, resolveLanguageKey, } from '../tree-sitter/parser-loader.js';
|
|
8
|
+
const parserCache = new Map();
|
|
9
|
+
/**
|
|
10
|
+
* Ensure parser is initialized and language is loaded, then parse content.
|
|
11
|
+
* Returns null if language is unavailable or parsing fails.
|
|
12
|
+
*/
|
|
13
|
+
export const ensureAndParse = async (content, filePath) => {
|
|
14
|
+
const language = getLanguageFromFilename(filePath);
|
|
15
|
+
if (!language)
|
|
16
|
+
return null;
|
|
17
|
+
if (!isLanguageAvailable(language))
|
|
18
|
+
return null;
|
|
19
|
+
const parserKey = resolveLanguageKey(language, filePath);
|
|
20
|
+
let parserInstance = parserCache.get(parserKey);
|
|
21
|
+
if (!parserInstance) {
|
|
22
|
+
parserInstance = await createParserForLanguage(language, filePath);
|
|
23
|
+
parserCache.set(parserKey, parserInstance);
|
|
24
|
+
}
|
|
25
|
+
return parserInstance.parse(content);
|
|
26
|
+
};
|
|
27
|
+
const FUNCTION_LIKE_TYPES = new Set([
|
|
28
|
+
'function_declaration',
|
|
29
|
+
'function_definition',
|
|
30
|
+
'method_declaration',
|
|
31
|
+
'method_definition',
|
|
32
|
+
'function_item',
|
|
33
|
+
'function_signature_item',
|
|
34
|
+
'arrow_function',
|
|
35
|
+
'function_expression',
|
|
36
|
+
'generator_function_declaration',
|
|
37
|
+
'generator_function',
|
|
38
|
+
'async_function_declaration',
|
|
39
|
+
'async_arrow_function',
|
|
40
|
+
'constructor_declaration',
|
|
41
|
+
'constructor_definition',
|
|
42
|
+
'compact_constructor_declaration',
|
|
43
|
+
'short_function_declaration',
|
|
44
|
+
'proc_declaration',
|
|
45
|
+
'func_literal',
|
|
46
|
+
'local_function_statement',
|
|
47
|
+
'anonymous_function',
|
|
48
|
+
'lambda_literal',
|
|
49
|
+
'init_declaration',
|
|
50
|
+
'deinit_declaration',
|
|
51
|
+
]);
|
|
52
|
+
/**
|
|
53
|
+
* Find the first function/method-like declaration in a snippet AST.
|
|
54
|
+
* Used by the chunker when parsing node.content where absolute line
|
|
55
|
+
* numbers don't apply.
|
|
56
|
+
*/
|
|
57
|
+
export const findFunctionNode = (root) => {
|
|
58
|
+
if (FUNCTION_LIKE_TYPES.has(root.type))
|
|
59
|
+
return root;
|
|
60
|
+
for (let i = 0; i < root.namedChildCount; i++) {
|
|
61
|
+
const child = root.namedChild(i);
|
|
62
|
+
if (!child)
|
|
63
|
+
continue;
|
|
64
|
+
if (FUNCTION_LIKE_TYPES.has(child.type))
|
|
65
|
+
return child;
|
|
66
|
+
const found = findFunctionNode(child);
|
|
67
|
+
if (found)
|
|
68
|
+
return found;
|
|
69
|
+
}
|
|
70
|
+
return null;
|
|
71
|
+
};
|
|
72
|
+
/**
|
|
73
|
+
* Find the first class/struct/interface/enum-like declaration in an AST.
|
|
74
|
+
* Used when parsing node.content (a snippet, not a full file) where
|
|
75
|
+
* absolute line numbers don't apply.
|
|
76
|
+
*/
|
|
77
|
+
export const findDeclarationNode = (root) => {
|
|
78
|
+
const CLASS_LIKE_TYPES = new Set([
|
|
79
|
+
'class_declaration',
|
|
80
|
+
'class_definition',
|
|
81
|
+
'struct_declaration',
|
|
82
|
+
'struct_item',
|
|
83
|
+
'interface_declaration',
|
|
84
|
+
'interface_definition',
|
|
85
|
+
'enum_declaration',
|
|
86
|
+
'enum_item',
|
|
87
|
+
'type_declaration', // Go: type X struct
|
|
88
|
+
'declaration', // Go: type X struct
|
|
89
|
+
'object_declaration', // Kotlin: object
|
|
90
|
+
'impl_item', // Rust: impl
|
|
91
|
+
]);
|
|
92
|
+
if (CLASS_LIKE_TYPES.has(root.type))
|
|
93
|
+
return root;
|
|
94
|
+
for (let i = 0; i < root.namedChildCount; i++) {
|
|
95
|
+
const child = root.namedChild(i);
|
|
96
|
+
if (!child)
|
|
97
|
+
continue;
|
|
98
|
+
if (CLASS_LIKE_TYPES.has(child.type))
|
|
99
|
+
return child;
|
|
100
|
+
const found = findDeclarationNode(child);
|
|
101
|
+
if (found)
|
|
102
|
+
return found;
|
|
103
|
+
}
|
|
104
|
+
return null;
|
|
105
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character-based sliding window chunking (pure, no tree-sitter dependency)
|
|
3
|
+
*/
|
|
4
|
+
export interface Chunk {
|
|
5
|
+
text: string;
|
|
6
|
+
chunkIndex: number;
|
|
7
|
+
startOffset: number;
|
|
8
|
+
endOffset: number;
|
|
9
|
+
startLine: number;
|
|
10
|
+
endLine: number;
|
|
11
|
+
}
|
|
12
|
+
export declare const characterChunk: (content: string, startLine: number, endLine: number, chunkSize?: number, overlap?: number) => Chunk[];
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Character-based sliding window chunking (pure, no tree-sitter dependency)
|
|
3
|
+
*/
|
|
4
|
+
import { buildLineIndex, resolveChunkLines } from './line-index.js';
|
|
5
|
+
export const characterChunk = (content, startLine, endLine, chunkSize = 1200, overlap = 120) => {
|
|
6
|
+
if (content.length <= chunkSize) {
|
|
7
|
+
return [
|
|
8
|
+
{
|
|
9
|
+
text: content,
|
|
10
|
+
chunkIndex: 0,
|
|
11
|
+
startOffset: 0,
|
|
12
|
+
endOffset: content.length,
|
|
13
|
+
startLine,
|
|
14
|
+
endLine,
|
|
15
|
+
},
|
|
16
|
+
];
|
|
17
|
+
}
|
|
18
|
+
const chunks = [];
|
|
19
|
+
let offset = 0;
|
|
20
|
+
const lineOffsets = buildLineIndex(content);
|
|
21
|
+
while (offset < content.length) {
|
|
22
|
+
const end = Math.min(offset + chunkSize, content.length);
|
|
23
|
+
const chunkText = content.slice(offset, end);
|
|
24
|
+
const lineRange = resolveChunkLines(lineOffsets, offset, end, startLine);
|
|
25
|
+
chunks.push({
|
|
26
|
+
text: chunkText,
|
|
27
|
+
chunkIndex: chunks.length,
|
|
28
|
+
startOffset: offset,
|
|
29
|
+
endOffset: end,
|
|
30
|
+
startLine: lineRange.startLine,
|
|
31
|
+
endLine: lineRange.endLine,
|
|
32
|
+
});
|
|
33
|
+
offset = end - overlap;
|
|
34
|
+
if (offset >= content.length)
|
|
35
|
+
break;
|
|
36
|
+
if (end >= content.length)
|
|
37
|
+
break;
|
|
38
|
+
if (offset <= (chunks.length > 1 ? end - chunkSize : 0)) {
|
|
39
|
+
offset = end;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return chunks;
|
|
43
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunker Module
|
|
3
|
+
*
|
|
4
|
+
* Splits code nodes into chunks for embedding.
|
|
5
|
+
* - Function/Method: AST-aware chunking by statement boundaries
|
|
6
|
+
* - Other types: character-based sliding window fallback
|
|
7
|
+
* - Short content (≤ chunkSize): no chunking
|
|
8
|
+
*/
|
|
9
|
+
export { type Chunk, characterChunk } from './character-chunk.js';
|
|
10
|
+
import type { Chunk } from './character-chunk.js';
|
|
11
|
+
/**
|
|
12
|
+
* Main chunkNode function: dispatches by label
|
|
13
|
+
*/
|
|
14
|
+
export declare const chunkNode: (label: string, content: string, filePath: string, startLine: number, endLine: number, chunkSize?: number, overlap?: number) => Promise<Chunk[]>;
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunker Module
|
|
3
|
+
*
|
|
4
|
+
* Splits code nodes into chunks for embedding.
|
|
5
|
+
* - Function/Method: AST-aware chunking by statement boundaries
|
|
6
|
+
* - Other types: character-based sliding window fallback
|
|
7
|
+
* - Short content (≤ chunkSize): no chunking
|
|
8
|
+
*/
|
|
9
|
+
export { characterChunk } from './character-chunk.js';
|
|
10
|
+
import { characterChunk } from './character-chunk.js';
|
|
11
|
+
import { ensureAndParse, findDeclarationNode, findFunctionNode } from './ast-utils.js';
|
|
12
|
+
import { buildLineIndex, resolveChunkLines } from './line-index.js';
|
|
13
|
+
/**
|
|
14
|
+
* Main chunkNode function: dispatches by label
|
|
15
|
+
*/
|
|
16
|
+
export const chunkNode = async (label, content, filePath, startLine, endLine, chunkSize = 1200, overlap = 120) => {
|
|
17
|
+
// Content fits in one chunk — no splitting needed
|
|
18
|
+
if (content.length <= chunkSize) {
|
|
19
|
+
return [
|
|
20
|
+
{
|
|
21
|
+
text: content,
|
|
22
|
+
chunkIndex: 0,
|
|
23
|
+
startOffset: 0,
|
|
24
|
+
endOffset: content.length,
|
|
25
|
+
startLine,
|
|
26
|
+
endLine,
|
|
27
|
+
},
|
|
28
|
+
];
|
|
29
|
+
}
|
|
30
|
+
// Only function-like labels get AST chunking
|
|
31
|
+
if (label === 'Function' || label === 'Method' || label === 'Constructor') {
|
|
32
|
+
try {
|
|
33
|
+
const astChunks = await astChunk(content, filePath, startLine, endLine, chunkSize, overlap);
|
|
34
|
+
if (astChunks.length > 0)
|
|
35
|
+
return astChunks;
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
// AST parsing failed — fall through to character fallback
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (label === 'Class' || label === 'Interface') {
|
|
42
|
+
try {
|
|
43
|
+
const declarationChunks = await declarationChunk(label, content, filePath, startLine, endLine, chunkSize, overlap);
|
|
44
|
+
if (declarationChunks.length > 0)
|
|
45
|
+
return declarationChunks;
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
// AST parsing failed — fall through to character fallback
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// Character-based fallback for everything else
|
|
52
|
+
return characterChunk(content, startLine, endLine, chunkSize, overlap);
|
|
53
|
+
};
|
|
54
|
+
/**
|
|
55
|
+
* AST-based chunking for Function/Method
|
|
56
|
+
* Parse snippet content, locate the function declaration node,
|
|
57
|
+
* split body by statement boundaries.
|
|
58
|
+
*/
|
|
59
|
+
const astChunk = async (content, filePath, startLine, endLine, chunkSize, overlap) => {
|
|
60
|
+
const tree = await ensureAndParse(content, filePath);
|
|
61
|
+
if (!tree)
|
|
62
|
+
return [];
|
|
63
|
+
const root = tree.rootNode;
|
|
64
|
+
const lineOffsets = buildLineIndex(content);
|
|
65
|
+
// Find the function/method declaration in the snippet AST.
|
|
66
|
+
// tree-sitter parses node.content (a snippet), so rows are relative (0-based).
|
|
67
|
+
const targetNode = findFunctionNode(root);
|
|
68
|
+
if (!targetNode)
|
|
69
|
+
return [];
|
|
70
|
+
// Get the body (statements) via childForFieldName('body')
|
|
71
|
+
const bodyNode = targetNode.childForFieldName('body');
|
|
72
|
+
if (!bodyNode)
|
|
73
|
+
return [];
|
|
74
|
+
// Extract individual statements
|
|
75
|
+
const statements = [];
|
|
76
|
+
for (let i = 0; i < bodyNode.namedChildCount; i++) {
|
|
77
|
+
const child = bodyNode.namedChild(i);
|
|
78
|
+
if (!child)
|
|
79
|
+
continue;
|
|
80
|
+
statements.push({
|
|
81
|
+
startIndex: child.startIndex,
|
|
82
|
+
endIndex: child.endIndex,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
if (statements.length === 0)
|
|
86
|
+
return [];
|
|
87
|
+
return chunkByUnits(content, lineOffsets, startLine, chunkSize, overlap, statements, targetNode.startIndex, targetNode.endIndex, true, true);
|
|
88
|
+
};
|
|
89
|
+
const DECLARATION_BODY_NODE_TYPES = new Set([
|
|
90
|
+
'class_body',
|
|
91
|
+
'object_type',
|
|
92
|
+
'declaration_list',
|
|
93
|
+
'interface_body',
|
|
94
|
+
]);
|
|
95
|
+
const FIELD_LIKE_MEMBER_TYPES = new Set([
|
|
96
|
+
'field_definition',
|
|
97
|
+
'public_field_definition',
|
|
98
|
+
'property_definition',
|
|
99
|
+
'property_signature',
|
|
100
|
+
'variable_declarator',
|
|
101
|
+
'lexical_declaration',
|
|
102
|
+
'pair',
|
|
103
|
+
'enum_assignment',
|
|
104
|
+
]);
|
|
105
|
+
const declarationChunk = async (label, content, filePath, startLine, endLine, chunkSize, overlap) => {
|
|
106
|
+
const tree = await ensureAndParse(content, filePath);
|
|
107
|
+
if (!tree)
|
|
108
|
+
return [];
|
|
109
|
+
const targetNode = findDeclarationNode(tree.rootNode);
|
|
110
|
+
if (!targetNode)
|
|
111
|
+
return [];
|
|
112
|
+
const bodyNode = getDeclarationBodyNode(targetNode);
|
|
113
|
+
if (!bodyNode)
|
|
114
|
+
return [];
|
|
115
|
+
const members = collectDeclarationUnits(bodyNode, label);
|
|
116
|
+
if (members.length === 0)
|
|
117
|
+
return [];
|
|
118
|
+
return chunkByUnits(content, buildLineIndex(content), startLine, chunkSize, overlap, members, targetNode.startIndex, targetNode.endIndex, false, false);
|
|
119
|
+
};
|
|
120
|
+
const buildChunk = (content, lineOffsets, chunkIndex, startOffset, endOffset, baseStartLine) => {
|
|
121
|
+
const lineRange = resolveChunkLines(lineOffsets, startOffset, endOffset, baseStartLine);
|
|
122
|
+
return {
|
|
123
|
+
text: content.slice(startOffset, endOffset),
|
|
124
|
+
chunkIndex,
|
|
125
|
+
startOffset,
|
|
126
|
+
endOffset,
|
|
127
|
+
startLine: lineRange.startLine,
|
|
128
|
+
endLine: lineRange.endLine,
|
|
129
|
+
};
|
|
130
|
+
};
|
|
131
|
+
const chunkByUnits = (content, lineOffsets, baseStartLine, chunkSize, overlap, units, containerStartOffset, containerEndOffset, includeContainerPrefixOnFirstChunk, includeContainerSuffixOnLastChunk) => {
|
|
132
|
+
const chunks = [];
|
|
133
|
+
let chunkStartUnitIdx = 0;
|
|
134
|
+
while (chunkStartUnitIdx < units.length) {
|
|
135
|
+
const chunkStartOffset = chunkStartUnitIdx === 0 && includeContainerPrefixOnFirstChunk
|
|
136
|
+
? containerStartOffset
|
|
137
|
+
: units[chunkStartUnitIdx].startIndex;
|
|
138
|
+
let chunkEndUnitIdx = chunkStartUnitIdx;
|
|
139
|
+
let candidateEndOffset = chunkEndUnitIdx === units.length - 1 && includeContainerSuffixOnLastChunk
|
|
140
|
+
? containerEndOffset
|
|
141
|
+
: units[chunkEndUnitIdx].endIndex;
|
|
142
|
+
while (chunkEndUnitIdx + 1 < units.length) {
|
|
143
|
+
const nextEndOffset = chunkEndUnitIdx + 1 === units.length - 1 && includeContainerSuffixOnLastChunk
|
|
144
|
+
? containerEndOffset
|
|
145
|
+
: units[chunkEndUnitIdx + 1].endIndex;
|
|
146
|
+
if (nextEndOffset - chunkStartOffset > chunkSize)
|
|
147
|
+
break;
|
|
148
|
+
chunkEndUnitIdx += 1;
|
|
149
|
+
candidateEndOffset = nextEndOffset;
|
|
150
|
+
}
|
|
151
|
+
if (candidateEndOffset - chunkStartOffset > chunkSize) {
|
|
152
|
+
const oversizedUnit = units[chunkStartUnitIdx];
|
|
153
|
+
const oversizedLineRange = resolveChunkLines(lineOffsets, oversizedUnit.startIndex, oversizedUnit.endIndex, baseStartLine);
|
|
154
|
+
const oversizedChunks = characterChunk(content.slice(oversizedUnit.startIndex, oversizedUnit.endIndex), oversizedLineRange.startLine, oversizedLineRange.endLine, chunkSize, overlap).map((chunk, offsetIdx) => ({
|
|
155
|
+
...chunk,
|
|
156
|
+
chunkIndex: chunks.length + offsetIdx,
|
|
157
|
+
startOffset: chunk.startOffset + oversizedUnit.startIndex,
|
|
158
|
+
endOffset: chunk.endOffset + oversizedUnit.startIndex,
|
|
159
|
+
}));
|
|
160
|
+
chunks.push(...oversizedChunks);
|
|
161
|
+
chunkStartUnitIdx += 1;
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
chunks.push(buildChunk(content, lineOffsets, chunks.length, chunkStartOffset, candidateEndOffset, baseStartLine));
|
|
165
|
+
if (chunkEndUnitIdx === units.length - 1) {
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
const nextChunkStartUnitIdx = findOverlapStartIndex(units, chunkStartUnitIdx, chunkEndUnitIdx, overlap);
|
|
169
|
+
if (nextChunkStartUnitIdx <= chunkStartUnitIdx) {
|
|
170
|
+
chunkStartUnitIdx = chunkEndUnitIdx + 1;
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
chunkStartUnitIdx = nextChunkStartUnitIdx;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return chunks;
|
|
177
|
+
};
|
|
178
|
+
const findOverlapStartIndex = (statements, chunkStartStmtIdx, chunkEndStmtIdx, overlapSize) => {
|
|
179
|
+
if (overlapSize <= 0)
|
|
180
|
+
return chunkEndStmtIdx + 1;
|
|
181
|
+
let overlapStartIdx = chunkEndStmtIdx;
|
|
182
|
+
while (overlapStartIdx > chunkStartStmtIdx) {
|
|
183
|
+
const overlapLength = statements[chunkEndStmtIdx].endIndex - statements[overlapStartIdx - 1].startIndex;
|
|
184
|
+
if (overlapLength > overlapSize)
|
|
185
|
+
break;
|
|
186
|
+
overlapStartIdx -= 1;
|
|
187
|
+
}
|
|
188
|
+
return overlapStartIdx;
|
|
189
|
+
};
|
|
190
|
+
const getDeclarationBodyNode = (node) => {
|
|
191
|
+
const bodyNode = node.childForFieldName?.('body');
|
|
192
|
+
if (bodyNode)
|
|
193
|
+
return bodyNode;
|
|
194
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
195
|
+
const child = node.namedChild(i);
|
|
196
|
+
if (!child)
|
|
197
|
+
continue;
|
|
198
|
+
if (DECLARATION_BODY_NODE_TYPES.has(child.type))
|
|
199
|
+
return child;
|
|
200
|
+
}
|
|
201
|
+
return null;
|
|
202
|
+
};
|
|
203
|
+
const collectDeclarationUnits = (bodyNode, label) => {
|
|
204
|
+
const members = [];
|
|
205
|
+
for (let i = 0; i < bodyNode.namedChildCount; i++) {
|
|
206
|
+
const child = bodyNode.namedChild(i);
|
|
207
|
+
if (!child)
|
|
208
|
+
continue;
|
|
209
|
+
members.push({
|
|
210
|
+
startIndex: child.startIndex,
|
|
211
|
+
endIndex: child.endIndex,
|
|
212
|
+
groupable: label === 'Class' && FIELD_LIKE_MEMBER_TYPES.has(child.type),
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
if (members.length === 0)
|
|
216
|
+
return [];
|
|
217
|
+
const grouped = [];
|
|
218
|
+
let current = members[0];
|
|
219
|
+
for (let i = 1; i < members.length; i++) {
|
|
220
|
+
const next = members[i];
|
|
221
|
+
if (current.groupable && next.groupable) {
|
|
222
|
+
current = {
|
|
223
|
+
startIndex: current.startIndex,
|
|
224
|
+
endIndex: next.endIndex,
|
|
225
|
+
groupable: true,
|
|
226
|
+
};
|
|
227
|
+
continue;
|
|
228
|
+
}
|
|
229
|
+
grouped.push({ startIndex: current.startIndex, endIndex: current.endIndex });
|
|
230
|
+
current = next;
|
|
231
|
+
}
|
|
232
|
+
grouped.push({ startIndex: current.startIndex, endIndex: current.endIndex });
|
|
233
|
+
return grouped;
|
|
234
|
+
};
|
|
@@ -131,6 +131,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
131
131
|
try {
|
|
132
132
|
// Configure transformers.js environment
|
|
133
133
|
env.allowLocalModels = false;
|
|
134
|
+
// Default cache to user-writable location. transformers.js defaults to
|
|
135
|
+
// ./node_modules/.cache inside its own install dir, which is unwritable
|
|
136
|
+
// when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
|
|
137
|
+
// Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
|
|
138
|
+
env.cacheDir = process.env.HF_HOME ?? `${process.env.HOME}/.cache/huggingface`;
|
|
134
139
|
const isDev = process.env.NODE_ENV === 'development';
|
|
135
140
|
if (isDev) {
|
|
136
141
|
console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
|
|
@@ -3,16 +3,34 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Orchestrates the background embedding process:
|
|
5
5
|
* 1. Query embeddable nodes from LadybugDB
|
|
6
|
-
* 2. Generate text representations
|
|
7
|
-
* 3.
|
|
8
|
-
* 4. Update LadybugDB with embeddings
|
|
6
|
+
* 2. Generate text representations with enriched metadata
|
|
7
|
+
* 3. Chunk long nodes, batch embed
|
|
8
|
+
* 4. Update LadybugDB with chunk-aware embeddings
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
|
-
import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
|
|
11
|
+
import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type EmbeddingContext } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Compute a stable content fingerprint for an embeddable node.
|
|
14
|
+
* Used to detect when the underlying text has changed so stale vectors
|
|
15
|
+
* can be replaced (DELETE-then-INSERT, the Kuzu-sanctioned pattern for
|
|
16
|
+
* vector-indexed rows).
|
|
17
|
+
*/
|
|
18
|
+
export declare const contentHashForNode: (node: EmbeddableNode, config?: Partial<EmbeddingConfig>) => string;
|
|
12
19
|
/**
|
|
13
20
|
* Progress callback type
|
|
14
21
|
*/
|
|
15
22
|
export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
23
|
+
/**
|
|
24
|
+
* Batch INSERT chunk-aware embeddings into CodeEmbedding table
|
|
25
|
+
*/
|
|
26
|
+
export declare const batchInsertEmbeddings: (executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, updates: Array<{
|
|
27
|
+
nodeId: string;
|
|
28
|
+
chunkIndex: number;
|
|
29
|
+
startLine: number;
|
|
30
|
+
endLine: number;
|
|
31
|
+
embedding: number[];
|
|
32
|
+
contentHash?: string;
|
|
33
|
+
}>) => Promise<void>;
|
|
16
34
|
/**
|
|
17
35
|
* Run the embedding pipeline
|
|
18
36
|
*
|
|
@@ -21,31 +39,18 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
|
21
39
|
* @param onProgress - Callback for progress updates
|
|
22
40
|
* @param config - Optional configuration override
|
|
23
41
|
* @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
|
|
42
|
+
* @param context - Optional repo/server context for metadata enrichment
|
|
43
|
+
* @param existingEmbeddings - Optional map of nodeId → contentHash for incremental mode.
|
|
44
|
+
* Nodes whose hash matches are skipped; nodes with a changed hash are DELETE'd
|
|
45
|
+
* and re-embedded; nodes not in the map are embedded fresh.
|
|
46
|
+
|
|
24
47
|
*/
|
|
25
|
-
export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
|
|
48
|
+
export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>, context?: EmbeddingContext, existingEmbeddings?: Map<string, string>) => Promise<void>;
|
|
26
49
|
/**
|
|
27
|
-
* Perform semantic search using the vector index
|
|
28
|
-
*
|
|
29
|
-
* Uses CodeEmbedding table and queries each node table to get metadata
|
|
30
|
-
*
|
|
31
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
32
|
-
* @param query - Search query text
|
|
33
|
-
* @param k - Number of results to return (default: 10)
|
|
34
|
-
* @param maxDistance - Maximum distance threshold (default: 0.5)
|
|
35
|
-
* @returns Array of search results ordered by relevance
|
|
50
|
+
* Perform semantic search using the vector index with chunk deduplication
|
|
36
51
|
*/
|
|
37
52
|
export declare const semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, maxDistance?: number) => Promise<SemanticSearchResult[]>;
|
|
38
53
|
/**
|
|
39
54
|
* Semantic search with graph expansion (flattened results)
|
|
40
|
-
*
|
|
41
|
-
* Note: With multi-table schema, graph traversal is simplified.
|
|
42
|
-
* Returns semantic matches with their metadata.
|
|
43
|
-
* For full graph traversal, use execute_vector_cypher tool directly.
|
|
44
|
-
*
|
|
45
|
-
* @param executeQuery - Function to execute Cypher queries
|
|
46
|
-
* @param query - Search query text
|
|
47
|
-
* @param k - Number of initial semantic matches (default: 5)
|
|
48
|
-
* @param _hops - Unused (kept for API compatibility).
|
|
49
|
-
* @returns Semantic matches with metadata
|
|
50
55
|
*/
|
|
51
56
|
export declare const semanticSearchWithContext: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number, _hops?: number) => Promise<any[]>;
|