@zilliz/claude-context-core 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +275 -0
- package/dist/context.d.ts +234 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +879 -0
- package/dist/context.js.map +1 -0
- package/dist/embedding/base-embedding.d.ts +45 -0
- package/dist/embedding/base-embedding.d.ts.map +1 -0
- package/dist/embedding/base-embedding.js +36 -0
- package/dist/embedding/base-embedding.js.map +1 -0
- package/dist/embedding/gemini-embedding.d.ts +51 -0
- package/dist/embedding/gemini-embedding.d.ts.map +1 -0
- package/dist/embedding/gemini-embedding.js +143 -0
- package/dist/embedding/gemini-embedding.js.map +1 -0
- package/dist/embedding/index.d.ts +6 -0
- package/dist/embedding/index.d.ts.map +1 -0
- package/dist/embedding/index.js +24 -0
- package/dist/embedding/index.js.map +1 -0
- package/dist/embedding/ollama-embedding.d.ts +64 -0
- package/dist/embedding/ollama-embedding.d.ts.map +1 -0
- package/dist/embedding/ollama-embedding.js +205 -0
- package/dist/embedding/ollama-embedding.js.map +1 -0
- package/dist/embedding/openai-embedding.d.ts +36 -0
- package/dist/embedding/openai-embedding.d.ts.map +1 -0
- package/dist/embedding/openai-embedding.js +103 -0
- package/dist/embedding/openai-embedding.js.map +1 -0
- package/dist/embedding/voyageai-embedding.d.ts +43 -0
- package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
- package/dist/embedding/voyageai-embedding.js +223 -0
- package/dist/embedding/voyageai-embedding.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/splitter/ast-splitter.d.ts +22 -0
- package/dist/splitter/ast-splitter.d.ts.map +1 -0
- package/dist/splitter/ast-splitter.js +227 -0
- package/dist/splitter/ast-splitter.js.map +1 -0
- package/dist/splitter/index.d.ts +41 -0
- package/dist/splitter/index.d.ts.map +1 -0
- package/dist/splitter/index.js +27 -0
- package/dist/splitter/index.js.map +1 -0
- package/dist/splitter/langchain-splitter.d.ts +13 -0
- package/dist/splitter/langchain-splitter.d.ts.map +1 -0
- package/dist/splitter/langchain-splitter.js +118 -0
- package/dist/splitter/langchain-splitter.js.map +1 -0
- package/dist/sync/merkle.d.ts +26 -0
- package/dist/sync/merkle.d.ts.map +1 -0
- package/dist/sync/merkle.js +112 -0
- package/dist/sync/merkle.js.map +1 -0
- package/dist/sync/synchronizer.d.ts +30 -0
- package/dist/sync/synchronizer.d.ts.map +1 -0
- package/dist/sync/synchronizer.js +339 -0
- package/dist/sync/synchronizer.js.map +1 -0
- package/dist/types.d.ts +14 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/env-manager.d.ts +19 -0
- package/dist/utils/env-manager.d.ts.map +1 -0
- package/dist/utils/env-manager.js +125 -0
- package/dist/utils/env-manager.js.map +1 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +7 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/vectordb/index.d.ts +5 -0
- package/dist/vectordb/index.d.ts.map +1 -0
- package/dist/vectordb/index.js +14 -0
- package/dist/vectordb/index.js.map +1 -0
- package/dist/vectordb/milvus-restful-vectordb.d.ts +51 -0
- package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
- package/dist/vectordb/milvus-restful-vectordb.js +406 -0
- package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
- package/dist/vectordb/milvus-vectordb.d.ts +34 -0
- package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
- package/dist/vectordb/milvus-vectordb.js +248 -0
- package/dist/vectordb/milvus-vectordb.js.map +1 -0
- package/dist/vectordb/types.d.ts +75 -0
- package/dist/vectordb/types.d.ts.map +1 -0
- package/dist/vectordb/types.js +9 -0
- package/dist/vectordb/types.js.map +1 -0
- package/dist/vectordb/zilliz-utils.d.ts +135 -0
- package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
- package/dist/vectordb/zilliz-utils.js +192 -0
- package/dist/vectordb/zilliz-utils.js.map +1 -0
- package/package.json +56 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./splitter"), exports);
|
|
18
|
+
__exportStar(require("./embedding"), exports);
|
|
19
|
+
__exportStar(require("./vectordb"), exports);
|
|
20
|
+
__exportStar(require("./types"), exports);
|
|
21
|
+
__exportStar(require("./context"), exports);
|
|
22
|
+
__exportStar(require("./sync/synchronizer"), exports);
|
|
23
|
+
__exportStar(require("./utils"), exports);
|
|
24
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,6CAA2B;AAC3B,8CAA4B;AAC5B,6CAA2B;AAC3B,0CAAwB;AACxB,4CAA0B;AAC1B,sDAAoC;AACpC,0CAAwB"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Splitter, CodeChunk } from './index';
|
|
2
|
+
export declare class AstCodeSplitter implements Splitter {
|
|
3
|
+
private chunkSize;
|
|
4
|
+
private chunkOverlap;
|
|
5
|
+
private parser;
|
|
6
|
+
private langchainFallback;
|
|
7
|
+
constructor(chunkSize?: number, chunkOverlap?: number);
|
|
8
|
+
split(code: string, language: string, filePath?: string): Promise<CodeChunk[]>;
|
|
9
|
+
setChunkSize(chunkSize: number): void;
|
|
10
|
+
setChunkOverlap(chunkOverlap: number): void;
|
|
11
|
+
private getLanguageConfig;
|
|
12
|
+
private extractChunks;
|
|
13
|
+
private refineChunks;
|
|
14
|
+
private splitLargeChunk;
|
|
15
|
+
private addOverlap;
|
|
16
|
+
private getLineCount;
|
|
17
|
+
/**
|
|
18
|
+
* Check if AST splitting is supported for the given language
|
|
19
|
+
*/
|
|
20
|
+
static isLanguageSupported(language: string): boolean;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=ast-splitter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ast-splitter.d.ts","sourceRoot":"","sources":["../../src/splitter/ast-splitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAsB9C,qBAAa,eAAgB,YAAW,QAAQ;IAC5C,OAAO,CAAC,SAAS,CAAgB;IACjC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,iBAAiB,CAAM;gBAEnB,SAAS,CAAC,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM;IAU/C,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAgCpF,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAKrC,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAK3C,OAAO,CAAC,iBAAiB;IAoBzB,OAAO,CAAC,aAAa;YAuDP,YAAY;IAgB1B,OAAO,CAAC,eAAe;IAgDvB,OAAO,CAAC,UAAU;IA4BlB,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,MAAM,CAAC,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;CAOxD"}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.AstCodeSplitter = void 0;
|
|
7
|
+
const tree_sitter_1 = __importDefault(require("tree-sitter"));
|
|
8
|
+
// Language parsers
|
|
9
|
+
const JavaScript = require('tree-sitter-javascript');
|
|
10
|
+
const TypeScript = require('tree-sitter-typescript').typescript;
|
|
11
|
+
const Python = require('tree-sitter-python');
|
|
12
|
+
const Java = require('tree-sitter-java');
|
|
13
|
+
const Cpp = require('tree-sitter-cpp');
|
|
14
|
+
const Go = require('tree-sitter-go');
|
|
15
|
+
const Rust = require('tree-sitter-rust');
|
|
16
|
+
// Node types that represent logical code units
|
|
17
|
+
const SPLITTABLE_NODE_TYPES = {
|
|
18
|
+
javascript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement'],
|
|
19
|
+
typescript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement', 'interface_declaration', 'type_alias_declaration'],
|
|
20
|
+
python: ['function_definition', 'class_definition', 'decorated_definition', 'async_function_definition'],
|
|
21
|
+
java: ['method_declaration', 'class_declaration', 'interface_declaration', 'constructor_declaration'],
|
|
22
|
+
cpp: ['function_definition', 'class_specifier', 'namespace_definition', 'declaration'],
|
|
23
|
+
go: ['function_declaration', 'method_declaration', 'type_declaration', 'var_declaration', 'const_declaration'],
|
|
24
|
+
rust: ['function_item', 'impl_item', 'struct_item', 'enum_item', 'trait_item', 'mod_item']
|
|
25
|
+
};
|
|
26
|
+
class AstCodeSplitter {
|
|
27
|
+
constructor(chunkSize, chunkOverlap) {
|
|
28
|
+
this.chunkSize = 2500;
|
|
29
|
+
this.chunkOverlap = 300;
|
|
30
|
+
if (chunkSize)
|
|
31
|
+
this.chunkSize = chunkSize;
|
|
32
|
+
if (chunkOverlap)
|
|
33
|
+
this.chunkOverlap = chunkOverlap;
|
|
34
|
+
this.parser = new tree_sitter_1.default();
|
|
35
|
+
// Initialize fallback splitter
|
|
36
|
+
const { LangChainCodeSplitter } = require('./langchain-splitter');
|
|
37
|
+
this.langchainFallback = new LangChainCodeSplitter(chunkSize, chunkOverlap);
|
|
38
|
+
}
|
|
39
|
+
async split(code, language, filePath) {
|
|
40
|
+
// Check if language is supported by AST splitter
|
|
41
|
+
const langConfig = this.getLanguageConfig(language);
|
|
42
|
+
if (!langConfig) {
|
|
43
|
+
console.log(`📝 Language ${language} not supported by AST, using LangChain splitter for: ${filePath || 'unknown'}`);
|
|
44
|
+
return await this.langchainFallback.split(code, language, filePath);
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
console.log(`🌳 Using AST splitter for ${language} file: ${filePath || 'unknown'}`);
|
|
48
|
+
this.parser.setLanguage(langConfig.parser);
|
|
49
|
+
const tree = this.parser.parse(code);
|
|
50
|
+
if (!tree.rootNode) {
|
|
51
|
+
console.warn(`⚠️ Failed to parse AST for ${language}, falling back to LangChain: ${filePath || 'unknown'}`);
|
|
52
|
+
return await this.langchainFallback.split(code, language, filePath);
|
|
53
|
+
}
|
|
54
|
+
// Extract chunks based on AST nodes
|
|
55
|
+
const chunks = this.extractChunks(tree.rootNode, code, langConfig.nodeTypes, language, filePath);
|
|
56
|
+
// If chunks are too large, split them further
|
|
57
|
+
const refinedChunks = await this.refineChunks(chunks, code);
|
|
58
|
+
return refinedChunks;
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
console.warn(`⚠️ AST splitter failed for ${language}, falling back to LangChain: ${error}`);
|
|
62
|
+
return await this.langchainFallback.split(code, language, filePath);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
setChunkSize(chunkSize) {
|
|
66
|
+
this.chunkSize = chunkSize;
|
|
67
|
+
this.langchainFallback.setChunkSize(chunkSize);
|
|
68
|
+
}
|
|
69
|
+
setChunkOverlap(chunkOverlap) {
|
|
70
|
+
this.chunkOverlap = chunkOverlap;
|
|
71
|
+
this.langchainFallback.setChunkOverlap(chunkOverlap);
|
|
72
|
+
}
|
|
73
|
+
getLanguageConfig(language) {
|
|
74
|
+
const langMap = {
|
|
75
|
+
'javascript': { parser: JavaScript, nodeTypes: SPLITTABLE_NODE_TYPES.javascript },
|
|
76
|
+
'js': { parser: JavaScript, nodeTypes: SPLITTABLE_NODE_TYPES.javascript },
|
|
77
|
+
'typescript': { parser: TypeScript, nodeTypes: SPLITTABLE_NODE_TYPES.typescript },
|
|
78
|
+
'ts': { parser: TypeScript, nodeTypes: SPLITTABLE_NODE_TYPES.typescript },
|
|
79
|
+
'python': { parser: Python, nodeTypes: SPLITTABLE_NODE_TYPES.python },
|
|
80
|
+
'py': { parser: Python, nodeTypes: SPLITTABLE_NODE_TYPES.python },
|
|
81
|
+
'java': { parser: Java, nodeTypes: SPLITTABLE_NODE_TYPES.java },
|
|
82
|
+
'cpp': { parser: Cpp, nodeTypes: SPLITTABLE_NODE_TYPES.cpp },
|
|
83
|
+
'c++': { parser: Cpp, nodeTypes: SPLITTABLE_NODE_TYPES.cpp },
|
|
84
|
+
'c': { parser: Cpp, nodeTypes: SPLITTABLE_NODE_TYPES.cpp },
|
|
85
|
+
'go': { parser: Go, nodeTypes: SPLITTABLE_NODE_TYPES.go },
|
|
86
|
+
'rust': { parser: Rust, nodeTypes: SPLITTABLE_NODE_TYPES.rust },
|
|
87
|
+
'rs': { parser: Rust, nodeTypes: SPLITTABLE_NODE_TYPES.rust }
|
|
88
|
+
};
|
|
89
|
+
return langMap[language.toLowerCase()] || null;
|
|
90
|
+
}
|
|
91
|
+
extractChunks(node, code, splittableTypes, language, filePath) {
|
|
92
|
+
const chunks = [];
|
|
93
|
+
const codeLines = code.split('\n');
|
|
94
|
+
const traverse = (currentNode) => {
|
|
95
|
+
// Check if this node type should be split into a chunk
|
|
96
|
+
if (splittableTypes.includes(currentNode.type)) {
|
|
97
|
+
const startLine = currentNode.startPosition.row + 1;
|
|
98
|
+
const endLine = currentNode.endPosition.row + 1;
|
|
99
|
+
const nodeText = code.slice(currentNode.startIndex, currentNode.endIndex);
|
|
100
|
+
// Only create chunk if it has meaningful content
|
|
101
|
+
if (nodeText.trim().length > 0) {
|
|
102
|
+
chunks.push({
|
|
103
|
+
content: nodeText,
|
|
104
|
+
metadata: {
|
|
105
|
+
startLine,
|
|
106
|
+
endLine,
|
|
107
|
+
language,
|
|
108
|
+
filePath,
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Continue traversing child nodes
|
|
114
|
+
for (const child of currentNode.children) {
|
|
115
|
+
traverse(child);
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
traverse(node);
|
|
119
|
+
// If no meaningful chunks found, create a single chunk with the entire code
|
|
120
|
+
if (chunks.length === 0) {
|
|
121
|
+
chunks.push({
|
|
122
|
+
content: code,
|
|
123
|
+
metadata: {
|
|
124
|
+
startLine: 1,
|
|
125
|
+
endLine: codeLines.length,
|
|
126
|
+
language,
|
|
127
|
+
filePath,
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
return chunks;
|
|
132
|
+
}
|
|
133
|
+
async refineChunks(chunks, originalCode) {
|
|
134
|
+
const refinedChunks = [];
|
|
135
|
+
for (const chunk of chunks) {
|
|
136
|
+
if (chunk.content.length <= this.chunkSize) {
|
|
137
|
+
refinedChunks.push(chunk);
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
// Split large chunks using character-based splitting
|
|
141
|
+
const subChunks = this.splitLargeChunk(chunk, originalCode);
|
|
142
|
+
refinedChunks.push(...subChunks);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return this.addOverlap(refinedChunks);
|
|
146
|
+
}
|
|
147
|
+
splitLargeChunk(chunk, originalCode) {
|
|
148
|
+
const lines = chunk.content.split('\n');
|
|
149
|
+
const subChunks = [];
|
|
150
|
+
let currentChunk = '';
|
|
151
|
+
let currentStartLine = chunk.metadata.startLine;
|
|
152
|
+
let currentLineCount = 0;
|
|
153
|
+
for (let i = 0; i < lines.length; i++) {
|
|
154
|
+
const line = lines[i];
|
|
155
|
+
const lineWithNewline = i === lines.length - 1 ? line : line + '\n';
|
|
156
|
+
if (currentChunk.length + lineWithNewline.length > this.chunkSize && currentChunk.length > 0) {
|
|
157
|
+
// Create a sub-chunk
|
|
158
|
+
subChunks.push({
|
|
159
|
+
content: currentChunk.trim(),
|
|
160
|
+
metadata: {
|
|
161
|
+
startLine: currentStartLine,
|
|
162
|
+
endLine: currentStartLine + currentLineCount - 1,
|
|
163
|
+
language: chunk.metadata.language,
|
|
164
|
+
filePath: chunk.metadata.filePath,
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
currentChunk = lineWithNewline;
|
|
168
|
+
currentStartLine = chunk.metadata.startLine + i;
|
|
169
|
+
currentLineCount = 1;
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
currentChunk += lineWithNewline;
|
|
173
|
+
currentLineCount++;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// Add the last sub-chunk
|
|
177
|
+
if (currentChunk.trim().length > 0) {
|
|
178
|
+
subChunks.push({
|
|
179
|
+
content: currentChunk.trim(),
|
|
180
|
+
metadata: {
|
|
181
|
+
startLine: currentStartLine,
|
|
182
|
+
endLine: currentStartLine + currentLineCount - 1,
|
|
183
|
+
language: chunk.metadata.language,
|
|
184
|
+
filePath: chunk.metadata.filePath,
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
return subChunks;
|
|
189
|
+
}
|
|
190
|
+
addOverlap(chunks) {
|
|
191
|
+
if (chunks.length <= 1 || this.chunkOverlap <= 0) {
|
|
192
|
+
return chunks;
|
|
193
|
+
}
|
|
194
|
+
const overlappedChunks = [];
|
|
195
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
196
|
+
let content = chunks[i].content;
|
|
197
|
+
const metadata = { ...chunks[i].metadata };
|
|
198
|
+
// Add overlap from previous chunk
|
|
199
|
+
if (i > 0 && this.chunkOverlap > 0) {
|
|
200
|
+
const prevChunk = chunks[i - 1];
|
|
201
|
+
const overlapText = prevChunk.content.slice(-this.chunkOverlap);
|
|
202
|
+
content = overlapText + '\n' + content;
|
|
203
|
+
metadata.startLine = Math.max(1, metadata.startLine - this.getLineCount(overlapText));
|
|
204
|
+
}
|
|
205
|
+
overlappedChunks.push({
|
|
206
|
+
content,
|
|
207
|
+
metadata
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
return overlappedChunks;
|
|
211
|
+
}
|
|
212
|
+
getLineCount(text) {
|
|
213
|
+
return text.split('\n').length;
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Check if AST splitting is supported for the given language
|
|
217
|
+
*/
|
|
218
|
+
static isLanguageSupported(language) {
|
|
219
|
+
const supportedLanguages = [
|
|
220
|
+
'javascript', 'js', 'typescript', 'ts', 'python', 'py',
|
|
221
|
+
'java', 'cpp', 'c++', 'c', 'go', 'rust', 'rs'
|
|
222
|
+
];
|
|
223
|
+
return supportedLanguages.includes(language.toLowerCase());
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
exports.AstCodeSplitter = AstCodeSplitter;
|
|
227
|
+
//# sourceMappingURL=ast-splitter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ast-splitter.js","sourceRoot":"","sources":["../../src/splitter/ast-splitter.ts"],"names":[],"mappings":";;;;;;AAAA,8DAAiC;AAGjC,mBAAmB;AACnB,MAAM,UAAU,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC;AACrD,MAAM,UAAU,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC,UAAU,CAAC;AAChE,MAAM,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;AAC7C,MAAM,IAAI,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;AACzC,MAAM,GAAG,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;AACvC,MAAM,EAAE,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;AACrC,MAAM,IAAI,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;AAEzC,+CAA+C;AAC/C,MAAM,qBAAqB,GAAG;IAC1B,UAAU,EAAE,CAAC,sBAAsB,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,kBAAkB,CAAC;IACpH,UAAU,EAAE,CAAC,sBAAsB,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,uBAAuB,EAAE,wBAAwB,CAAC;IACvK,MAAM,EAAE,CAAC,qBAAqB,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,2BAA2B,CAAC;IACxG,IAAI,EAAE,CAAC,oBAAoB,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,yBAAyB,CAAC;IACrG,GAAG,EAAE,CAAC,qBAAqB,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,aAAa,CAAC;IACtF,EAAE,EAAE,CAAC,sBAAsB,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,mBAAmB,CAAC;IAC9G,IAAI,EAAE,CAAC,eAAe,EAAE,WAAW,EAAE,aAAa,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,CAAC;CAC7F,CAAC;AAEF,MAAa,eAAe;IAMxB,YAAY,SAAkB,EAAE,YAAqB;QAL7C,cAAS,GAAW,IAAI,CAAC;QACzB,iBAAY,GAAW,GAAG,CAAC;QAK/B,IAAI,SAAS;YAAE,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC1C,IAAI,YAAY;YAAE,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACnD,IAAI,CAAC,MAAM,GAAG,IAAI,qBAAM,EAAE,CAAC;QAE3B,+BAA+B;QAC/B,MAAM,EAAE,qBAAqB,EAAE,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;QAClE,IAAI,CAAC,iBAAiB,GAAG,IAAI,qBAAqB,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAChF,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAgB,EAAE,QAAiB;QACzD,iDAAiD;QACjD,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,EAAE,CAAC;YACd,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,wDAAwD,QAAQ,IAAI,SAAS,EAAE,CAAC,CAAC;YACpH,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxE,CAAC;QAED,IAAI,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,6BAA6B,QAAQ,UAAU,QAAQ,IAAI,SAAS,EAAE,CAAC,CAAC;YAEpF,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAErC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC,+BAA+B,QAAQ,gCAAgC,QAAQ,IAAI,SAAS,EAAE,CAAC,CAAC;gBAC7G,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;YACxE,CAAC;YAED,oCAAoC;YACpC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,UAAU,CAAC,SAAS,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAEjG,8CAA8C;YAC9C,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAE5D,OAAO,aAAa,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,IAAI,CAAC,+BAA+B,QAAQ,gCAAgC,KAAK,EAAE,CAAC,CAAC;YAC7F,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxE,CAAC;IACL,CAAC;IAED,YAAY,CAAC,SAAiB;QAC1B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,iBAAiB,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACnD,CAAC;IAED,eAAe,CAAC,YAAoB;QAChC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,iBAAiB,CAAC,eAAe,CAAC,YAAY,CAAC,CAAC;IACzD,CAAC;IAEO,iBAAiB,CAAC,QAAgB;QACtC,MAAM,OAAO,GAAyD;YAClE,YAAY,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACjF,IAAI,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACzE,YAAY,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACjF,IAAI,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACzE,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,qBAAqB,CAAC,MAAM,EAAE;YACrE,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,qBAAqB,CAAC,MAAM,EAAE;YACjE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,qBAAqB,CAAC,IAAI,EAAE;YAC/D,KAAK,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,GAAG,EAAE;YAC5D,KAAK,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,GAAG,EAAE;YAC5D,GAAG,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,GAAG,EAAE;YAC1D,IAAI,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,qBAAqB,CAAC,EAAE,EAAE;YACzD,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,qBAAqB,CAAC,IAAI,EAAE;YAC/D,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,qBAAqB,CAAC,IAAI,EAAE;SAChE,CAAC;QAEF,OAAO,OAAO,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC;IACnD,CAAC;IAEO,aAAa,CACjB,IAAuB,EACvB,IAAY,EACZ,eAAyB,EACzB,QAAgB,EAChB,QAAiB;QAEjB,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEnC,MAAM,QAAQ,GAAG,CAAC,WAA8B,EAAE,EAAE;YAChD,uDAAuD;YACvD,IAAI,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7C,MAAM,SAAS,GAAG,WAAW,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC;gBACpD,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,CAAC;gBAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,UAAU,EAAE,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAE1E,iDAAiD;gBACjD,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7B,MAAM,CAAC,IAAI,CAAC;wBACR,OAAO,EAAE,QAAQ;wBACjB,QAAQ,EAAE;4BACN,SAAS;4BACT,OAAO;4BACP,QAAQ;4BACR,QAAQ;yBACX;qBACJ,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YAED,kCAAkC;YAClC,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,QAAQ,EAAE,CAAC;gBACvC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACpB,CAAC;QACL,CAAC,CAAC;QAEF,QAAQ,CAAC,IAAI,CAAC,CAAC;QAEf,4EAA4E;QAC5E,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,MAAM,CAAC,IAAI,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,QAAQ,EAAE;oBACN,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,SAAS,CAAC,MAAM;oBACzB,QAAQ;oBACR,QAAQ;iBACX;aACJ,CAAC,CAAC;QACP,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,MAAmB,EAAE,YAAoB;QAChE,MAAM,aAAa,GAAgB,EAAE,CAAC;QAEtC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YACzB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACzC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACJ,qDAAqD;gBACrD,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;gBAC5D,aAAa,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;YACrC,CAAC;QACL,CAAC;QAED,OAAO,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;IAC1C,CAAC;IAEO,eAAe,CAAC,KAAgB,EAAE,YAAoB;QAC1D,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,SAAS,GAAgB,EAAE,CAAC;QAClC,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,gBAAgB,GAAG,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;QAChD,IAAI,gBAAgB,GAAG,CAAC,CAAC;QAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,eAAe,GAAG,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC;YAEpE,IAAI,YAAY,CAAC,MAAM,GAAG,eAAe,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3F,qBAAqB;gBACrB,SAAS,CAAC,IAAI,CAAC;oBACX,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;oBAC5B,QAAQ,EAAE;wBACN,SAAS,EAAE,gBAAgB;wBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;wBAChD,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;wBACjC,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;qBACpC;iBACJ,CAAC,CAAC;gBAEH,YAAY,GAAG,eAAe,CAAC;gBAC/B,gBAAgB,GAAG,KAAK,CAAC,QAAQ,CAAC,SAAS,GAAG,CAAC,CAAC;gBAChD,gBAAgB,GAAG,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACJ,YAAY,IAAI,eAAe,CAAC;gBAChC,gBAAgB,EAAE,CAAC;YACvB,CAAC;QACL,CAAC;QAED,yBAAyB;QACzB,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC;gBACX,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;gBAC5B,QAAQ,EAAE;oBACN,SAAS,EAAE,gBAAgB;oBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;oBAChD,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;oBACjC,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;iBACpC;aACJ,CAAC,CAAC;QACP,CAAC;QAED,OAAO,SAAS,CAAC;IACrB,CAAC;IAEO,UAAU,CAAC,MAAmB;QAClC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,YAAY,IAAI,CAAC,EAAE,CAAC;YAC/C,OAAO,MAAM,CAAC;QAClB,CAAC;QAED,MAAM,gBAAgB,GAAgB,EAAE,CAAC;QAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAChC,MAAM,QAAQ,GAAG,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YAE3C,kCAAkC;YAClC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC;gBACjC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBAChC,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAChE,OAAO,GAAG,WAAW,GAAG,IAAI,GAAG,OAAO,CAAC;gBACvC,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAC1F,CAAC;YAED,gBAAgB,CAAC,IAAI,CAAC;gBAClB,OAAO;gBACP,QAAQ;aACX,CAAC,CAAC;QACP,CAAC;QAED,OAAO,gBAAgB,CAAC;IAC5B,CAAC;IAEO,YAAY,CAAC,IAAY;QAC7B,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,mBAAmB,CAAC,QAAgB;QACvC,MAAM,kBAAkB,GAAG;YACvB,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI;YACtD,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI;SAChD,CAAC;QACF,OAAO,kBAAkB,CAAC,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;IAC/D,CAAC;CACJ;AA/OD,0CA+OC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
export interface CodeChunk {
|
|
2
|
+
content: string;
|
|
3
|
+
metadata: {
|
|
4
|
+
startLine: number;
|
|
5
|
+
endLine: number;
|
|
6
|
+
language?: string;
|
|
7
|
+
filePath?: string;
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
export declare enum SplitterType {
|
|
11
|
+
LANGCHAIN = "langchain",
|
|
12
|
+
AST = "ast"
|
|
13
|
+
}
|
|
14
|
+
export interface SplitterConfig {
|
|
15
|
+
type?: SplitterType;
|
|
16
|
+
chunkSize?: number;
|
|
17
|
+
chunkOverlap?: number;
|
|
18
|
+
}
|
|
19
|
+
export interface Splitter {
|
|
20
|
+
/**
|
|
21
|
+
* Split code into code chunks
|
|
22
|
+
* @param code Code content
|
|
23
|
+
* @param language Programming language
|
|
24
|
+
* @param filePath File path
|
|
25
|
+
* @returns Array of code chunks
|
|
26
|
+
*/
|
|
27
|
+
split(code: string, language: string, filePath?: string): Promise<CodeChunk[]>;
|
|
28
|
+
/**
|
|
29
|
+
* Set chunk size
|
|
30
|
+
* @param chunkSize Chunk size
|
|
31
|
+
*/
|
|
32
|
+
setChunkSize(chunkSize: number): void;
|
|
33
|
+
/**
|
|
34
|
+
* Set chunk overlap size
|
|
35
|
+
* @param chunkOverlap Chunk overlap size
|
|
36
|
+
*/
|
|
37
|
+
setChunkOverlap(chunkOverlap: number): void;
|
|
38
|
+
}
|
|
39
|
+
export * from './langchain-splitter';
|
|
40
|
+
export * from './ast-splitter';
|
|
41
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/splitter/index.ts"],"names":[],"mappings":"AACA,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACL;AAGD,oBAAY,YAAY;IACpB,SAAS,cAAc;IACvB,GAAG,QAAQ;CACd;AAGD,MAAM,WAAW,cAAc;IAC3B,IAAI,CAAC,EAAE,YAAY,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACrB;;;;;;OAMG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IAE/E;;;OAGG;IACH,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAEtC;;;OAGG;IACH,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/C;AAGD,cAAc,sBAAsB,CAAC;AACrC,cAAc,gBAAgB,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.SplitterType = void 0;
|
|
18
|
+
// Splitter type enumeration
|
|
19
|
+
var SplitterType;
|
|
20
|
+
(function (SplitterType) {
|
|
21
|
+
SplitterType["LANGCHAIN"] = "langchain";
|
|
22
|
+
SplitterType["AST"] = "ast";
|
|
23
|
+
})(SplitterType || (exports.SplitterType = SplitterType = {}));
|
|
24
|
+
// Implementation class exports
|
|
25
|
+
__exportStar(require("./langchain-splitter"), exports);
|
|
26
|
+
__exportStar(require("./ast-splitter"), exports);
|
|
27
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/splitter/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAWA,4BAA4B;AAC5B,IAAY,YAGX;AAHD,WAAY,YAAY;IACpB,uCAAuB,CAAA;IACvB,2BAAW,CAAA;AACf,CAAC,EAHW,YAAY,4BAAZ,YAAY,QAGvB;AAgCD,+BAA+B;AAC/B,uDAAqC;AACrC,iDAA+B"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Splitter, CodeChunk } from './index';
|
|
2
|
+
export declare class LangChainCodeSplitter implements Splitter {
|
|
3
|
+
private chunkSize;
|
|
4
|
+
private chunkOverlap;
|
|
5
|
+
constructor(chunkSize?: number, chunkOverlap?: number);
|
|
6
|
+
split(code: string, language: string, filePath?: string): Promise<CodeChunk[]>;
|
|
7
|
+
setChunkSize(chunkSize: number): void;
|
|
8
|
+
setChunkOverlap(chunkOverlap: number): void;
|
|
9
|
+
private mapLanguage;
|
|
10
|
+
private fallbackSplit;
|
|
11
|
+
private estimateLines;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=langchain-splitter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"langchain-splitter.d.ts","sourceRoot":"","sources":["../../src/splitter/langchain-splitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAK9C,qBAAa,qBAAsB,YAAW,QAAQ;IAClD,OAAO,CAAC,SAAS,CAAgB;IACjC,OAAO,CAAC,YAAY,CAAe;gBAEvB,SAAS,CAAC,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM;IAK/C,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAwCpF,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAIrC,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAI3C,OAAO,CAAC,WAAW;YA4BL,aAAa;IAuB3B,OAAO,CAAC,aAAa;CAiBxB"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.LangChainCodeSplitter = void 0;
|
|
4
|
+
const text_splitter_1 = require("langchain/text_splitter");
|
|
5
|
+
class LangChainCodeSplitter {
|
|
6
|
+
constructor(chunkSize, chunkOverlap) {
|
|
7
|
+
this.chunkSize = 1000;
|
|
8
|
+
this.chunkOverlap = 200;
|
|
9
|
+
if (chunkSize)
|
|
10
|
+
this.chunkSize = chunkSize;
|
|
11
|
+
if (chunkOverlap)
|
|
12
|
+
this.chunkOverlap = chunkOverlap;
|
|
13
|
+
}
|
|
14
|
+
async split(code, language, filePath) {
|
|
15
|
+
try {
|
|
16
|
+
// Create language-specific splitter
|
|
17
|
+
const mappedLanguage = this.mapLanguage(language);
|
|
18
|
+
if (mappedLanguage) {
|
|
19
|
+
const splitter = text_splitter_1.RecursiveCharacterTextSplitter.fromLanguage(mappedLanguage, {
|
|
20
|
+
chunkSize: this.chunkSize,
|
|
21
|
+
chunkOverlap: this.chunkOverlap,
|
|
22
|
+
});
|
|
23
|
+
// Split code
|
|
24
|
+
const documents = await splitter.createDocuments([code]);
|
|
25
|
+
// Convert to CodeChunk format
|
|
26
|
+
return documents.map((doc, index) => {
|
|
27
|
+
const lines = doc.metadata?.loc?.lines || { from: 1, to: 1 };
|
|
28
|
+
return {
|
|
29
|
+
content: doc.pageContent,
|
|
30
|
+
metadata: {
|
|
31
|
+
startLine: lines.from,
|
|
32
|
+
endLine: lines.to,
|
|
33
|
+
language,
|
|
34
|
+
filePath,
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
// If language is not supported, use generic splitter directly
|
|
41
|
+
return this.fallbackSplit(code, language, filePath);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
catch (error) {
|
|
45
|
+
console.error('Error splitting code:', error);
|
|
46
|
+
// If specific language splitting fails, use generic splitter
|
|
47
|
+
return this.fallbackSplit(code, language, filePath);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
setChunkSize(chunkSize) {
|
|
51
|
+
this.chunkSize = chunkSize;
|
|
52
|
+
}
|
|
53
|
+
setChunkOverlap(chunkOverlap) {
|
|
54
|
+
this.chunkOverlap = chunkOverlap;
|
|
55
|
+
}
|
|
56
|
+
mapLanguage(language) {
|
|
57
|
+
// Map common language names to LangChain supported formats
|
|
58
|
+
const languageMap = {
|
|
59
|
+
'javascript': 'js',
|
|
60
|
+
'typescript': 'js',
|
|
61
|
+
'python': 'python',
|
|
62
|
+
'java': 'java',
|
|
63
|
+
'cpp': 'cpp',
|
|
64
|
+
'c++': 'cpp',
|
|
65
|
+
'c': 'cpp',
|
|
66
|
+
'go': 'go',
|
|
67
|
+
'rust': 'rust',
|
|
68
|
+
'php': 'php',
|
|
69
|
+
'ruby': 'ruby',
|
|
70
|
+
'swift': 'swift',
|
|
71
|
+
'scala': 'scala',
|
|
72
|
+
'html': 'html',
|
|
73
|
+
'markdown': 'markdown',
|
|
74
|
+
'md': 'markdown',
|
|
75
|
+
'latex': 'latex',
|
|
76
|
+
'tex': 'latex',
|
|
77
|
+
'solidity': 'sol',
|
|
78
|
+
'sol': 'sol',
|
|
79
|
+
};
|
|
80
|
+
return languageMap[language.toLowerCase()] || null;
|
|
81
|
+
}
|
|
82
|
+
async fallbackSplit(code, language, filePath) {
|
|
83
|
+
// Generic splitter as fallback
|
|
84
|
+
const splitter = new text_splitter_1.RecursiveCharacterTextSplitter({
|
|
85
|
+
chunkSize: this.chunkSize,
|
|
86
|
+
chunkOverlap: this.chunkOverlap,
|
|
87
|
+
});
|
|
88
|
+
const documents = await splitter.createDocuments([code]);
|
|
89
|
+
return documents.map((doc, index) => {
|
|
90
|
+
const lines = this.estimateLines(doc.pageContent, code);
|
|
91
|
+
return {
|
|
92
|
+
content: doc.pageContent,
|
|
93
|
+
metadata: {
|
|
94
|
+
startLine: lines.start,
|
|
95
|
+
endLine: lines.end,
|
|
96
|
+
language,
|
|
97
|
+
filePath,
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
estimateLines(chunk, originalCode) {
|
|
103
|
+
// Simple line number estimation
|
|
104
|
+
const codeLines = originalCode.split('\n');
|
|
105
|
+
const chunkLines = chunk.split('\n');
|
|
106
|
+
// Find chunk position in original code
|
|
107
|
+
const chunkStart = originalCode.indexOf(chunk);
|
|
108
|
+
if (chunkStart === -1) {
|
|
109
|
+
return { start: 1, end: chunkLines.length };
|
|
110
|
+
}
|
|
111
|
+
const beforeChunk = originalCode.substring(0, chunkStart);
|
|
112
|
+
const startLine = beforeChunk.split('\n').length;
|
|
113
|
+
const endLine = startLine + chunkLines.length - 1;
|
|
114
|
+
return { start: startLine, end: endLine };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
exports.LangChainCodeSplitter = LangChainCodeSplitter;
|
|
118
|
+
//# sourceMappingURL=langchain-splitter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"langchain-splitter.js","sourceRoot":"","sources":["../../src/splitter/langchain-splitter.ts"],"names":[],"mappings":";;;AAAA,2DAAyE;AAMzE,MAAa,qBAAqB;IAI9B,YAAY,SAAkB,EAAE,YAAqB;QAH7C,cAAS,GAAW,IAAI,CAAC;QACzB,iBAAY,GAAW,GAAG,CAAC;QAG/B,IAAI,SAAS;YAAE,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC1C,IAAI,YAAY;YAAE,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACvD,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAgB,EAAE,QAAiB;QACzD,IAAI,CAAC;YACD,oCAAoC;YACpC,MAAM,cAAc,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YAClD,IAAI,cAAc,EAAE,CAAC;gBACjB,MAAM,QAAQ,GAAG,8CAA8B,CAAC,YAAY,CACxD,cAAc,EACd;oBACI,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,YAAY,EAAE,IAAI,CAAC,YAAY;iBAClC,CACJ,CAAC;gBAEF,aAAa;gBACb,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;gBAEzD,8BAA8B;gBAC9B,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE;oBAChC,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC;oBAC7D,OAAO;wBACH,OAAO,EAAE,GAAG,CAAC,WAAW;wBACxB,QAAQ,EAAE;4BACN,SAAS,EAAE,KAAK,CAAC,IAAI;4BACrB,OAAO,EAAE,KAAK,CAAC,EAAE;4BACjB,QAAQ;4BACR,QAAQ;yBACX;qBACJ,CAAC;gBACN,CAAC,CAAC,CAAC;YACP,CAAC;iBAAM,CAAC;gBACJ,8DAA8D;gBAC9D,OAAO,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;YACxD,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC,CAAC;YAC9C,6DAA6D;YAC7D,OAAO,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxD,CAAC;IACL,CAAC;IAED,YAAY,CAAC,SAAiB;QAC1B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC/B,CAAC;IAED,eAAe,CAAC,YAAoB;QAChC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IACrC,CAAC;IAEO,WAAW,CAAC,QAAgB;QAChC,2DAA2D;QAC3D,MAAM,WAAW,GAAsC;YACnD,YAAY,EAAE,IAAI;YAClB,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,QAAQ;YAClB,MAAM,EAAE,MAAM;YACd,KAAK,EAAE,KAAK;YACZ,KAAK,EAAE,KAAK;YACZ,GAAG,EAAE,KAAK;YACV,IAAI,EAAE,IAAI;YACV,MAAM,EAAE,MAAM;YACd,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,OAAO;YAChB,OAAO,EAAE,OAAO;YAChB,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,UAAU;YACtB,IAAI,EAAE,UAAU;YAChB,OAAO,EAAE,OAAO;YAChB,KAAK,EAAE,OAAO;YACd,UAAU,EAAE,KAAK;YACjB,KAAK,EAAE,KAAK;SACf,CAAC;QAEF,OAAO,WAAW,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC;IACvD,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,IAAY,EAAE,QAAgB,EAAE,QAAiB;QACzE,+BAA+B;QAC/B,MAAM,QAAQ,GAAG,IAAI,8CAA8B,CAAC;YAChD,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,YAAY,EAAE,IAAI,CAAC,YAAY;SAClC,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAEzD,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;YACxD,OAAO;gBACH,OAAO,EAAE,GAAG,CAAC,WAAW;gBACxB,QAAQ,EAAE;oBACN,SAAS,EAAE,KAAK,CAAC,KAAK;oBACtB,OAAO,EAAE,KAAK,CAAC,GAAG;oBAClB,QAAQ;oBACR,QAAQ;iBACX;aACJ,CAAC;QACN,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,aAAa,CAAC,KAAa,EAAE,YAAoB;QACrD,gCAAgC;QAChC,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAErC,uCAAuC;QACvC,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAC/C,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;YACpB,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC;QAChD,CAAC;QAED,MAAM,WAAW,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QAC1D,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;QACjD,MAAM,OAAO,GAAG,SAAS,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC;QAElD,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;IAC9C,CAAC;CACJ;AA7HD,sDA6HC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface MerkleDAGNode {
|
|
2
|
+
id: string;
|
|
3
|
+
hash: string;
|
|
4
|
+
data: string;
|
|
5
|
+
parents: string[];
|
|
6
|
+
children: string[];
|
|
7
|
+
}
|
|
8
|
+
export declare class MerkleDAG {
|
|
9
|
+
nodes: Map<string, MerkleDAGNode>;
|
|
10
|
+
rootIds: string[];
|
|
11
|
+
constructor();
|
|
12
|
+
private hash;
|
|
13
|
+
addNode(data: string, parentId?: string): string;
|
|
14
|
+
getNode(nodeId: string): MerkleDAGNode | undefined;
|
|
15
|
+
getAllNodes(): MerkleDAGNode[];
|
|
16
|
+
getRootNodes(): MerkleDAGNode[];
|
|
17
|
+
getLeafNodes(): MerkleDAGNode[];
|
|
18
|
+
serialize(): any;
|
|
19
|
+
static deserialize(data: any): MerkleDAG;
|
|
20
|
+
static compare(dag1: MerkleDAG, dag2: MerkleDAG): {
|
|
21
|
+
added: string[];
|
|
22
|
+
removed: string[];
|
|
23
|
+
modified: string[];
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=merkle.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merkle.d.ts","sourceRoot":"","sources":["../../src/sync/merkle.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,aAAa;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACtB;AAED,qBAAa,SAAS;IAClB,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IAClC,OAAO,EAAE,MAAM,EAAE,CAAC;;IAOlB,OAAO,CAAC,IAAI;IAIL,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM;IA2BhD,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,aAAa,GAAG,SAAS;IAIlD,WAAW,IAAI,aAAa,EAAE;IAI9B,YAAY,IAAI,aAAa,EAAE;IAI/B,YAAY,IAAI,aAAa,EAAE;IAI/B,SAAS,IAAI,GAAG;WAOT,WAAW,CAAC,IAAI,EAAE,GAAG,GAAG,SAAS;WAOjC,OAAO,CAAC,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,GAAG;QAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;CAkBtH"}
|