opencode-rag-plugin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/ReadMe.md +423 -0
- package/dist/chunker/base.d.ts +10 -0
- package/dist/chunker/base.js +34 -0
- package/dist/chunker/base.js.map +1 -0
- package/dist/chunker/c.d.ts +8 -0
- package/dist/chunker/c.js +16 -0
- package/dist/chunker/c.js.map +1 -0
- package/dist/chunker/cpp.d.ts +8 -0
- package/dist/chunker/cpp.js +17 -0
- package/dist/chunker/cpp.js.map +1 -0
- package/dist/chunker/csharp.d.ts +8 -0
- package/dist/chunker/csharp.js +17 -0
- package/dist/chunker/csharp.js.map +1 -0
- package/dist/chunker/css.d.ts +8 -0
- package/dist/chunker/css.js +14 -0
- package/dist/chunker/css.js.map +1 -0
- package/dist/chunker/factory.d.ts +27 -0
- package/dist/chunker/factory.js +138 -0
- package/dist/chunker/factory.js.map +1 -0
- package/dist/chunker/fallback.d.ts +8 -0
- package/dist/chunker/fallback.js +34 -0
- package/dist/chunker/fallback.js.map +1 -0
- package/dist/chunker/go.d.ts +8 -0
- package/dist/chunker/go.js +13 -0
- package/dist/chunker/go.js.map +1 -0
- package/dist/chunker/grammar.d.ts +12 -0
- package/dist/chunker/grammar.js +43 -0
- package/dist/chunker/grammar.js.map +1 -0
- package/dist/chunker/html.d.ts +8 -0
- package/dist/chunker/html.js +12 -0
- package/dist/chunker/html.js.map +1 -0
- package/dist/chunker/java.d.ts +8 -0
- package/dist/chunker/java.js +14 -0
- package/dist/chunker/java.js.map +1 -0
- package/dist/chunker/javascript.d.ts +8 -0
- package/dist/chunker/javascript.js +15 -0
- package/dist/chunker/javascript.js.map +1 -0
- package/dist/chunker/json.d.ts +8 -0
- package/dist/chunker/json.js +11 -0
- package/dist/chunker/json.js.map +1 -0
- package/dist/chunker/kotlin.d.ts +8 -0
- package/dist/chunker/kotlin.js +15 -0
- package/dist/chunker/kotlin.js.map +1 -0
- package/dist/chunker/loader.d.ts +2 -0
- package/dist/chunker/loader.js +27 -0
- package/dist/chunker/loader.js.map +1 -0
- package/dist/chunker/markdown.d.ts +7 -0
- package/dist/chunker/markdown.js +96 -0
- package/dist/chunker/markdown.js.map +1 -0
- package/dist/chunker/pdf.d.ts +8 -0
- package/dist/chunker/pdf.js +93 -0
- package/dist/chunker/pdf.js.map +1 -0
- package/dist/chunker/python.d.ts +8 -0
- package/dist/chunker/python.js +13 -0
- package/dist/chunker/python.js.map +1 -0
- package/dist/chunker/razor.d.ts +7 -0
- package/dist/chunker/razor.js +85 -0
- package/dist/chunker/razor.js.map +1 -0
- package/dist/chunker/ruby.d.ts +8 -0
- package/dist/chunker/ruby.js +14 -0
- package/dist/chunker/ruby.js.map +1 -0
- package/dist/chunker/rust.d.ts +8 -0
- package/dist/chunker/rust.js +17 -0
- package/dist/chunker/rust.js.map +1 -0
- package/dist/chunker/sln.d.ts +9 -0
- package/dist/chunker/sln.js +65 -0
- package/dist/chunker/sln.js.map +1 -0
- package/dist/chunker/swift.d.ts +8 -0
- package/dist/chunker/swift.js +17 -0
- package/dist/chunker/swift.js.map +1 -0
- package/dist/chunker/tex.d.ts +7 -0
- package/dist/chunker/tex.js +93 -0
- package/dist/chunker/tex.js.map +1 -0
- package/dist/chunker/typescript.d.ts +8 -0
- package/dist/chunker/typescript.js +17 -0
- package/dist/chunker/typescript.js.map +1 -0
- package/dist/chunker/uuid.d.ts +1 -0
- package/dist/chunker/uuid.js +8 -0
- package/dist/chunker/uuid.js.map +1 -0
- package/dist/chunker/xml.d.ts +8 -0
- package/dist/chunker/xml.js +11 -0
- package/dist/chunker/xml.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +291 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/config.d.ts +59 -0
- package/dist/core/config.js +127 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/fileLogger.d.ts +6 -0
- package/dist/core/fileLogger.js +32 -0
- package/dist/core/fileLogger.js.map +1 -0
- package/dist/core/interfaces.d.ts +31 -0
- package/dist/core/interfaces.js +2 -0
- package/dist/core/interfaces.js.map +1 -0
- package/dist/core/manifest.d.ts +21 -0
- package/dist/core/manifest.js +48 -0
- package/dist/core/manifest.js.map +1 -0
- package/dist/embedder/factory.d.ts +4 -0
- package/dist/embedder/factory.js +27 -0
- package/dist/embedder/factory.js.map +1 -0
- package/dist/embedder/http.d.ts +11 -0
- package/dist/embedder/http.js +309 -0
- package/dist/embedder/http.js.map +1 -0
- package/dist/embedder/ollama.d.ts +14 -0
- package/dist/embedder/ollama.js +60 -0
- package/dist/embedder/ollama.js.map +1 -0
- package/dist/embedder/openai.d.ts +12 -0
- package/dist/embedder/openai.js +33 -0
- package/dist/embedder/openai.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +49 -0
- package/dist/indexer.js +336 -0
- package/dist/indexer.js.map +1 -0
- package/dist/plugin-entry.d.ts +4 -0
- package/dist/plugin-entry.js +5 -0
- package/dist/plugin-entry.js.map +1 -0
- package/dist/plugin.d.ts +22 -0
- package/dist/plugin.js +477 -0
- package/dist/plugin.js.map +1 -0
- package/dist/retriever/retriever.d.ts +5 -0
- package/dist/retriever/retriever.js +14 -0
- package/dist/retriever/retriever.js.map +1 -0
- package/dist/types/opencode-plugin.d.ts +51 -0
- package/dist/vectorstore/lancedb.d.ts +18 -0
- package/dist/vectorstore/lancedb.js +196 -0
- package/dist/vectorstore/lancedb.js.map +1 -0
- package/dist/watcher.d.ts +14 -0
- package/dist/watcher.js +88 -0
- package/dist/watcher.js.map +1 -0
- package/package.json +82 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { uuid } from "./uuid.js";
|
|
2
|
+
const MAX_CHUNK_CHARS = 4000;
|
|
3
|
+
const MIN_GROUP_CHARS = 300;
|
|
4
|
+
const PARAGRAPH_SPLIT = /\n\s*\n/;
|
|
5
|
+
async function createPdfDocument(buffer) {
|
|
6
|
+
const { DOMMatrix } = await import("canvas");
|
|
7
|
+
globalThis.DOMMatrix ??= DOMMatrix;
|
|
8
|
+
globalThis.DOMMatrixReadOnly ??= DOMMatrix;
|
|
9
|
+
const { getDocument } = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
10
|
+
const loadingTask = getDocument({ data: new Uint8Array(buffer) });
|
|
11
|
+
return loadingTask.promise;
|
|
12
|
+
}
|
|
13
|
+
export async function extractPdfText(buffer) {
|
|
14
|
+
const pdf = await createPdfDocument(buffer);
|
|
15
|
+
const texts = [];
|
|
16
|
+
for (let i = 1; i <= pdf.numPages; i++) {
|
|
17
|
+
const page = await pdf.getPage(i);
|
|
18
|
+
const content = await page.getTextContent();
|
|
19
|
+
const textItems = content.items.filter((item) => typeof item === "object" && item !== null && "str" in item);
|
|
20
|
+
const strings = textItems.map((item) => item.str);
|
|
21
|
+
texts.push(strings.join(" "));
|
|
22
|
+
}
|
|
23
|
+
return texts.join("\n\n");
|
|
24
|
+
}
|
|
25
|
+
export class PdfChunker {
|
|
26
|
+
language = "pdf";
|
|
27
|
+
fileExtensions = [".pdf"];
|
|
28
|
+
async chunk(filePath, content) {
|
|
29
|
+
if (content.trim().length === 0)
|
|
30
|
+
return [];
|
|
31
|
+
const paragraphs = content.split(PARAGRAPH_SPLIT).filter((p) => p.trim().length > 0);
|
|
32
|
+
if (paragraphs.length === 0)
|
|
33
|
+
return [];
|
|
34
|
+
const chunks = [];
|
|
35
|
+
let currentGroup = [];
|
|
36
|
+
let currentSize = 0;
|
|
37
|
+
let paragraphIndex = 0;
|
|
38
|
+
function flush() {
|
|
39
|
+
const text = currentGroup.join("\n\n").trim();
|
|
40
|
+
if (text.length === 0)
|
|
41
|
+
return;
|
|
42
|
+
chunks.push({
|
|
43
|
+
id: uuid(),
|
|
44
|
+
content: text,
|
|
45
|
+
metadata: {
|
|
46
|
+
filePath,
|
|
47
|
+
startLine: paragraphIndex - currentGroup.length + 1,
|
|
48
|
+
endLine: paragraphIndex,
|
|
49
|
+
language: "pdf",
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
currentGroup = [];
|
|
53
|
+
currentSize = 0;
|
|
54
|
+
}
|
|
55
|
+
for (const para of paragraphs) {
|
|
56
|
+
paragraphIndex++;
|
|
57
|
+
const paraLen = para.length;
|
|
58
|
+
if (paraLen > MAX_CHUNK_CHARS) {
|
|
59
|
+
if (currentGroup.length > 0)
|
|
60
|
+
flush();
|
|
61
|
+
chunks.push({
|
|
62
|
+
id: uuid(),
|
|
63
|
+
content: para,
|
|
64
|
+
metadata: {
|
|
65
|
+
filePath,
|
|
66
|
+
startLine: paragraphIndex,
|
|
67
|
+
endLine: paragraphIndex,
|
|
68
|
+
language: "pdf",
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
if (currentGroup.length > 0 && currentSize + paraLen > MAX_CHUNK_CHARS) {
|
|
74
|
+
flush();
|
|
75
|
+
}
|
|
76
|
+
currentGroup.push(para);
|
|
77
|
+
currentSize += paraLen;
|
|
78
|
+
if (currentSize >= MIN_GROUP_CHARS && currentGroup.length >= 1) {
|
|
79
|
+
const nextParaStillSmall = paragraphIndex < paragraphs.length &&
|
|
80
|
+
paragraphs[paragraphIndex].length < MIN_GROUP_CHARS;
|
|
81
|
+
if (!nextParaStillSmall) {
|
|
82
|
+
flush();
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (currentGroup.length > 0) {
|
|
87
|
+
flush();
|
|
88
|
+
}
|
|
89
|
+
return chunks;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
export const pdfChunker = new PdfChunker();
|
|
93
|
+
//# sourceMappingURL=pdf.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/chunker/pdf.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,eAAe,GAAG,IAAI,CAAC;AAC7B,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B,MAAM,eAAe,GAAG,SAAS,CAAC;AAElC,KAAK,UAAU,iBAAiB,CAAC,MAAc;IAC7C,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;IAC7C,UAAU,CAAC,SAAS,KAAK,SAAmD,CAAC;IAC7E,UAAU,CAAC,iBAAiB,KAAK,SAA2D,CAAC;IAE7F,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,iCAAiC,CAAC,CAAC;IACxE,MAAM,WAAW,GAAG,WAAW,CAAC,EAAE,IAAI,EAAE,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAClE,OAAO,WAAW,CAAC,OAAO,CAAC;AAC7B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,MAAc;IACjD,MAAM,GAAG,GAAG,MAAM,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CACpC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,KAAK,IAAI,IAAI,CAChD,CAAC;QACvB,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAED,MAAM,OAAO,UAAU;IACZ,QAAQ,GAAG,KAAK,CAAC;IACjB,cAAc,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnC,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,OAAe;QAC3C,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE3C,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACrF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEvC,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,SAAS,KAAK;YACZ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;YAC9C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO;YAC9B,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAI,EAAE;gBACV,OAAO,EAAE,IAAI;gBACb,QAAQ,EAAE;oBACR,QAAQ;oBACR,SAAS,EAAE,cAAc,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC;oBACnD,OAAO,EAAE,cAAc;oBACvB,QAAQ,EAAE,KAAK;iBAChB;aACF,CAAC,CAAC;YACH,YAAY,GAAG,EAAE,CAAC;YAClB,WAAW,GAAG,CAAC,CAAC;QAClB,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,cAAc,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;YAE5B,IAAI,OAAO,GAAG,eAAe,EAAE,CAAC;gBAC9B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC;oBAAE,KAAK,EAAE,CAAC;gBACrC,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAI,EAAE;oBACV,OAAO,EAAE,IAAI;oBACb,QAAQ,EAAE;wBACR,QAAQ;wBACR,SAAS,EAAE,cAAc;wBACzB,OAAO,EAAE,cAAc;wBACvB,QAAQ,EAAE,KAAK;qBAChB;iBACF,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,WAAW,GAAG,OAAO,GAAG,eAAe,EAAE,CAAC;gBACvE,KAAK,EAAE,CAAC;YACV,CAAC;YAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,WAAW,IAAI,OAAO,CAAC;YAEvB,IAAI,WAAW,IAAI,eAAe,IAAI,YAAY,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC/D,MAAM,kBAAkB,GACtB,cAAc,GAAG,UAAU,CAAC,MAAM;oBAClC,UAAU,CAAC,cAAc,CAAE,CAAC,MAAM,GAAG,eAAe,CAAC;gBACvD,IAAI,CAAC,kBAAkB,EAAE,CAAC;oBACxB,KAAK,EAAE,CAAC;gBACV,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,KAAK,EAAE,CAAC;QACV,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class PythonChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "python";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "python";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const pythonChunker: PythonChunker;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class PythonChunker extends TreeSitterChunker {
|
|
3
|
+
language = "python";
|
|
4
|
+
fileExtensions = [".py"];
|
|
5
|
+
grammarName = "python";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"function_definition",
|
|
8
|
+
"class_definition",
|
|
9
|
+
"decorated_definition",
|
|
10
|
+
]);
|
|
11
|
+
}
|
|
12
|
+
export const pythonChunker = new PythonChunker();
|
|
13
|
+
//# sourceMappingURL=python.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"python.js","sourceRoot":"","sources":["../../src/chunker/python.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,aAAc,SAAQ,iBAAiB;IACzC,QAAQ,GAAG,QAAQ,CAAC;IACpB,cAAc,GAAG,CAAC,KAAK,CAAC,CAAC;IACzB,WAAW,GAAG,QAAQ,CAAC;IACvB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,qBAAqB;QACrB,kBAAkB;QAClB,sBAAsB;KACvB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Chunker, Chunk } from "../core/interfaces.js";
|
|
2
|
+
export declare class RazorChunker implements Chunker {
|
|
3
|
+
readonly language = "razor";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
chunk(filePath: string, content: string): Promise<Chunk[]>;
|
|
6
|
+
}
|
|
7
|
+
export declare const razorChunker: RazorChunker;
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { uuid } from "./uuid.js";
|
|
2
|
+
const CODE_BLOCK_REGEX = /@(code|functions)\s*\{/g;
|
|
3
|
+
function countNewlines(content, end) {
|
|
4
|
+
let count = 0;
|
|
5
|
+
for (let i = 0; i < end; i++) {
|
|
6
|
+
if (content[i] === "\n")
|
|
7
|
+
count++;
|
|
8
|
+
}
|
|
9
|
+
return count;
|
|
10
|
+
}
|
|
11
|
+
function findCodeBlocks(content, keyword) {
|
|
12
|
+
const blocks = [];
|
|
13
|
+
const regex = new RegExp(`@${keyword}\\s*\\{`, "g");
|
|
14
|
+
let match;
|
|
15
|
+
while ((match = regex.exec(content)) !== null) {
|
|
16
|
+
let depth = 1;
|
|
17
|
+
let pos = match.index + match[0].length;
|
|
18
|
+
while (pos < content.length && depth > 0) {
|
|
19
|
+
if (content[pos] === "{")
|
|
20
|
+
depth++;
|
|
21
|
+
else if (content[pos] === "}")
|
|
22
|
+
depth--;
|
|
23
|
+
pos++;
|
|
24
|
+
}
|
|
25
|
+
blocks.push({ start: match.index, end: pos });
|
|
26
|
+
}
|
|
27
|
+
return blocks;
|
|
28
|
+
}
|
|
29
|
+
export class RazorChunker {
|
|
30
|
+
language = "razor";
|
|
31
|
+
fileExtensions = [".razor", ".cshtml"];
|
|
32
|
+
async chunk(filePath, content) {
|
|
33
|
+
if (content.trim().length === 0)
|
|
34
|
+
return [];
|
|
35
|
+
const codeBlocks = [
|
|
36
|
+
...findCodeBlocks(content, "code"),
|
|
37
|
+
...findCodeBlocks(content, "functions"),
|
|
38
|
+
].sort((a, b) => a.start - b.start);
|
|
39
|
+
const regions = [];
|
|
40
|
+
let lastEnd = 0;
|
|
41
|
+
for (const block of codeBlocks) {
|
|
42
|
+
if (block.start > lastEnd) {
|
|
43
|
+
regions.push({ start: lastEnd, end: block.start });
|
|
44
|
+
}
|
|
45
|
+
regions.push({ start: block.start, end: block.end });
|
|
46
|
+
lastEnd = block.end;
|
|
47
|
+
}
|
|
48
|
+
if (lastEnd < content.length) {
|
|
49
|
+
regions.push({ start: lastEnd, end: content.length });
|
|
50
|
+
}
|
|
51
|
+
if (regions.length === 0) {
|
|
52
|
+
return [
|
|
53
|
+
{
|
|
54
|
+
id: uuid(),
|
|
55
|
+
content,
|
|
56
|
+
metadata: {
|
|
57
|
+
filePath,
|
|
58
|
+
startLine: 1,
|
|
59
|
+
endLine: countNewlines(content, content.length) + 1,
|
|
60
|
+
language: this.language,
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
];
|
|
64
|
+
}
|
|
65
|
+
return regions
|
|
66
|
+
.map(({ start, end }) => {
|
|
67
|
+
const chunkContent = content.slice(start, end).trim();
|
|
68
|
+
if (chunkContent.length === 0)
|
|
69
|
+
return null;
|
|
70
|
+
return {
|
|
71
|
+
id: uuid(),
|
|
72
|
+
content: chunkContent,
|
|
73
|
+
metadata: {
|
|
74
|
+
filePath,
|
|
75
|
+
startLine: countNewlines(content, start) + 1,
|
|
76
|
+
endLine: countNewlines(content, end) + 1,
|
|
77
|
+
language: this.language,
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
})
|
|
81
|
+
.filter((c) => c !== null);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
export const razorChunker = new RazorChunker();
|
|
85
|
+
//# sourceMappingURL=razor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"razor.js","sourceRoot":"","sources":["../../src/chunker/razor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,gBAAgB,GAAG,yBAAyB,CAAC;AAEnD,SAAS,aAAa,CAAC,OAAe,EAAE,GAAW;IACjD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,IAAI;YAAE,KAAK,EAAE,CAAC;IACnC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CACrB,OAAe,EACf,OAAe;IAEf,MAAM,MAAM,GAAqC,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,IAAI,OAAO,SAAS,EAAE,GAAG,CAAC,CAAC;IACpD,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAExC,OAAO,GAAG,GAAG,OAAO,CAAC,MAAM,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACzC,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG;gBAAE,KAAK,EAAE,CAAC;iBAC7B,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG;gBAAE,KAAK,EAAE,CAAC;YACvC,GAAG,EAAE,CAAC;QACR,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,OAAO,YAAY;IACd,QAAQ,GAAG,OAAO,CAAC;IACnB,cAAc,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAEhD,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,OAAe;QAC3C,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE3C,MAAM,UAAU,GAAG;YACjB,GAAG,cAAc,CAAC,OAAO,EAAE,MAAM,CAAC;YAClC,GAAG,cAAc,CAAC,OAAO,EAAE,WAAW,CAAC;SACxC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAEpC,MAAM,OAAO,GAAqC,EAAE,CAAC;QACrD,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YAC/B,IAAI,KAAK,CAAC,KAAK,GAAG,OAAO,EAAE,CAAC;gBAC1B,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;YACrD,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC;QACtB,CAAC;QAED,IAAI,OAAO,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;YAC7B,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL;oBACE,EAAE,EAAE,IAAI,EAAE;oBACV,OAAO;oBACP,QAAQ,EAAE;wBACR,QAAQ;wBACR,SAAS,EAAE,CAAC;wBACZ,OAAO,EAAE,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC;wBACnD,QAAQ,EAAE,IAAI,CAAC,QAAQ;qBACxB;iBACF;aACF,CAAC;QACJ,CAAC;QAED,OAAO,OAAO;aACX,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,EAAE;YACtB,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YACtD,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,IAAI,CAAC;YAE3C,OAAO;gBACL,EAAE,EAAE,IAAI,EAAE;gBACV,OAAO,EAAE,YAAY;gBACrB,QAAQ,EAAE;oBACR,QAAQ;oBACR,SAAS,EAAE,aAAa,CAAC,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC;oBAC5C,OAAO,EAAE,aAAa,CAAC,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC;oBACxC,QAAQ,EAAE,IAAI,CAAC,QAAQ;iBACxB;aACF,CAAC;QACJ,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,CAAC,EAAc,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,IAAI,YAAY,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class RubyChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "ruby";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "ruby";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const rubyChunker: RubyChunker;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class RubyChunker extends TreeSitterChunker {
|
|
3
|
+
language = "ruby";
|
|
4
|
+
fileExtensions = [".rb"];
|
|
5
|
+
grammarName = "ruby";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"method",
|
|
8
|
+
"class",
|
|
9
|
+
"module",
|
|
10
|
+
"singleton_method",
|
|
11
|
+
]);
|
|
12
|
+
}
|
|
13
|
+
export const rubyChunker = new RubyChunker();
|
|
14
|
+
//# sourceMappingURL=ruby.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ruby.js","sourceRoot":"","sources":["../../src/chunker/ruby.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,WAAY,SAAQ,iBAAiB;IACvC,QAAQ,GAAG,MAAM,CAAC;IAClB,cAAc,GAAG,CAAC,KAAK,CAAC,CAAC;IACzB,WAAW,GAAG,MAAM,CAAC;IACrB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,QAAQ;QACR,OAAO;QACP,QAAQ;QACR,kBAAkB;KACnB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class RustChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "rust";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "rust";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const rustChunker: RustChunker;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class RustChunker extends TreeSitterChunker {
|
|
3
|
+
language = "rust";
|
|
4
|
+
fileExtensions = [".rs"];
|
|
5
|
+
grammarName = "rust";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"function_item",
|
|
8
|
+
"struct_item",
|
|
9
|
+
"enum_item",
|
|
10
|
+
"trait_item",
|
|
11
|
+
"impl_item",
|
|
12
|
+
"mod_item",
|
|
13
|
+
"type_item",
|
|
14
|
+
]);
|
|
15
|
+
}
|
|
16
|
+
export const rustChunker = new RustChunker();
|
|
17
|
+
//# sourceMappingURL=rust.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rust.js","sourceRoot":"","sources":["../../src/chunker/rust.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,WAAY,SAAQ,iBAAiB;IACvC,QAAQ,GAAG,MAAM,CAAC;IAClB,cAAc,GAAG,CAAC,KAAK,CAAC,CAAC;IACzB,WAAW,GAAG,MAAM,CAAC;IACrB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,eAAe;QACf,aAAa;QACb,WAAW;QACX,YAAY;QACZ,WAAW;QACX,UAAU;QACV,WAAW;KACZ,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Chunker, Chunk } from "../core/interfaces.js";
|
|
2
|
+
export declare class SlnChunker implements Chunker {
|
|
3
|
+
readonly language = "sln";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly name = "sln";
|
|
6
|
+
chunk(filePath: string, content: string): Promise<Chunk[]>;
|
|
7
|
+
private makeChunk;
|
|
8
|
+
}
|
|
9
|
+
export declare const slnChunker: SlnChunker;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { uuid } from "./uuid.js";
|
|
2
|
+
const PROJECT_LINE = /^Project\("/;
|
|
3
|
+
const GLOBAL_LINE = /^Global\s*$/;
|
|
4
|
+
const END_GLOBAL_LINE = /^EndGlobal\s*$/;
|
|
5
|
+
const END_PROJECT_LINE = /^EndProject\s*$/;
|
|
6
|
+
export class SlnChunker {
|
|
7
|
+
language = "sln";
|
|
8
|
+
fileExtensions = [".sln"];
|
|
9
|
+
name = "sln";
|
|
10
|
+
async chunk(filePath, content) {
|
|
11
|
+
if (content.trim().length === 0)
|
|
12
|
+
return [];
|
|
13
|
+
const chunks = [];
|
|
14
|
+
const lines = content.split("\n");
|
|
15
|
+
let sectionStart = 0;
|
|
16
|
+
let sectionEnd = 0;
|
|
17
|
+
let collecting = false;
|
|
18
|
+
for (let i = 0; i < lines.length; i++) {
|
|
19
|
+
const trimmed = lines[i].trim();
|
|
20
|
+
if (PROJECT_LINE.test(trimmed)) {
|
|
21
|
+
if (collecting) {
|
|
22
|
+
chunks.push(this.makeChunk(lines, sectionStart, i - 1, filePath));
|
|
23
|
+
}
|
|
24
|
+
sectionStart = i;
|
|
25
|
+
collecting = true;
|
|
26
|
+
}
|
|
27
|
+
else if (END_PROJECT_LINE.test(trimmed) || END_GLOBAL_LINE.test(trimmed)) {
|
|
28
|
+
if (collecting) {
|
|
29
|
+
chunks.push(this.makeChunk(lines, sectionStart, i, filePath));
|
|
30
|
+
collecting = false;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
else if (GLOBAL_LINE.test(trimmed)) {
|
|
34
|
+
if (collecting) {
|
|
35
|
+
chunks.push(this.makeChunk(lines, sectionStart, i - 1, filePath));
|
|
36
|
+
}
|
|
37
|
+
sectionStart = i;
|
|
38
|
+
collecting = true;
|
|
39
|
+
}
|
|
40
|
+
sectionEnd = i;
|
|
41
|
+
}
|
|
42
|
+
if (collecting && sectionStart <= sectionEnd) {
|
|
43
|
+
chunks.push(this.makeChunk(lines, sectionStart, sectionEnd, filePath));
|
|
44
|
+
}
|
|
45
|
+
else if (chunks.length === 0) {
|
|
46
|
+
chunks.push(this.makeChunk(lines, 0, lines.length - 1, filePath));
|
|
47
|
+
}
|
|
48
|
+
return chunks;
|
|
49
|
+
}
|
|
50
|
+
makeChunk(lines, start, end, filePath) {
|
|
51
|
+
const content = lines.slice(start, end + 1).join("\n");
|
|
52
|
+
return {
|
|
53
|
+
id: uuid(),
|
|
54
|
+
content,
|
|
55
|
+
metadata: {
|
|
56
|
+
filePath,
|
|
57
|
+
startLine: start + 1,
|
|
58
|
+
endLine: end + 1,
|
|
59
|
+
language: "sln",
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
export const slnChunker = new SlnChunker();
|
|
65
|
+
//# sourceMappingURL=sln.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sln.js","sourceRoot":"","sources":["../../src/chunker/sln.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,YAAY,GAAG,aAAa,CAAC;AACnC,MAAM,WAAW,GAAG,aAAa,CAAC;AAClC,MAAM,eAAe,GAAG,gBAAgB,CAAC;AACzC,MAAM,gBAAgB,GAAG,iBAAiB,CAAC;AAE3C,MAAM,OAAO,UAAU;IACZ,QAAQ,GAAG,KAAK,CAAC;IACjB,cAAc,GAAG,CAAC,MAAM,CAAC,CAAC;IAC1B,IAAI,GAAG,KAAK,CAAC;IAEtB,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,OAAe;QAC3C,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE3C,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAElC,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,UAAU,GAAG,KAAK,CAAC;QAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;YAEjC,IAAI,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC/B,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;gBACpE,CAAC;gBACD,YAAY,GAAG,CAAC,CAAC;gBACjB,UAAU,GAAG,IAAI,CAAC;YACpB,CAAC;iBAAM,IAAI,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC3E,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,YAAY,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;oBAC9D,UAAU,GAAG,KAAK,CAAC;gBACrB,CAAC;YACH,CAAC;iBAAM,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBACrC,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;gBACpE,CAAC;gBACD,YAAY,GAAG,CAAC,CAAC;gBACjB,UAAU,GAAG,IAAI,CAAC;YACpB,CAAC;YAED,UAAU,GAAG,CAAC,CAAC;QACjB,CAAC;QAED,IAAI,UAAU,IAAI,YAAY,IAAI,UAAU,EAAE,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,CAAC;QACzE,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC;QACpE,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,SAAS,CACf,KAAe,EACf,KAAa,EACb,GAAW,EACX,QAAgB;QAEhB,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,OAAO;YACL,EAAE,EAAE,IAAI,EAAE;YACV,OAAO;YACP,QAAQ,EAAE;gBACR,QAAQ;gBACR,SAAS,EAAE,KAAK,GAAG,CAAC;gBACpB,OAAO,EAAE,GAAG,GAAG,CAAC;gBAChB,QAAQ,EAAE,KAAK;aAChB;SACF,CAAC;IACJ,CAAC;CACF;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class SwiftChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "swift";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "swift";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const swiftChunker: SwiftChunker;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class SwiftChunker extends TreeSitterChunker {
|
|
3
|
+
language = "swift";
|
|
4
|
+
fileExtensions = [".swift"];
|
|
5
|
+
grammarName = "swift";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"function_declaration",
|
|
8
|
+
"class_declaration",
|
|
9
|
+
"struct_declaration",
|
|
10
|
+
"enum_declaration",
|
|
11
|
+
"protocol_declaration",
|
|
12
|
+
"extension_declaration",
|
|
13
|
+
"variable_declaration",
|
|
14
|
+
]);
|
|
15
|
+
}
|
|
16
|
+
export const swiftChunker = new SwiftChunker();
|
|
17
|
+
//# sourceMappingURL=swift.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"swift.js","sourceRoot":"","sources":["../../src/chunker/swift.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,YAAa,SAAQ,iBAAiB;IACxC,QAAQ,GAAG,OAAO,CAAC;IACnB,cAAc,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC5B,WAAW,GAAG,OAAO,CAAC;IACtB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,sBAAsB;QACtB,mBAAmB;QACnB,oBAAoB;QACpB,kBAAkB;QAClB,sBAAsB;QACtB,uBAAuB;QACvB,sBAAsB;KACvB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,IAAI,YAAY,EAAE,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Chunker, Chunk } from "../core/interfaces.js";
|
|
2
|
+
export declare class TexChunker implements Chunker {
|
|
3
|
+
readonly language = "latex";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
chunk(filePath: string, content: string): Promise<Chunk[]>;
|
|
6
|
+
}
|
|
7
|
+
export declare const texChunker: TexChunker;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { uuid } from "./uuid.js";
|
|
2
|
+
const SECTION_REGEX = /^\\(chapter|section|subsection|subsubsection)\*?\s*\{/gm;
|
|
3
|
+
export class TexChunker {
|
|
4
|
+
language = "latex";
|
|
5
|
+
fileExtensions = [".tex"];
|
|
6
|
+
async chunk(filePath, content) {
|
|
7
|
+
if (content.trim().length === 0)
|
|
8
|
+
return [];
|
|
9
|
+
const chunks = [];
|
|
10
|
+
const lines = content.split("\n");
|
|
11
|
+
const sections = [];
|
|
12
|
+
let inCommentBlock = false;
|
|
13
|
+
let currentSectionStart = 1;
|
|
14
|
+
let currentCommand = "";
|
|
15
|
+
let currentName = "";
|
|
16
|
+
for (let i = 0; i < lines.length; i++) {
|
|
17
|
+
const line = lines[i] ?? "";
|
|
18
|
+
const commentMatch = line.match(/^\\begin\{comment\}/);
|
|
19
|
+
if (commentMatch) {
|
|
20
|
+
inCommentBlock = true;
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
if (inCommentBlock) {
|
|
24
|
+
if (line.match(/^\\end\{comment\}/)) {
|
|
25
|
+
inCommentBlock = false;
|
|
26
|
+
}
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
SECTION_REGEX.lastIndex = 0;
|
|
30
|
+
const match = SECTION_REGEX.exec(line);
|
|
31
|
+
if (match) {
|
|
32
|
+
if (currentName) {
|
|
33
|
+
sections.push({
|
|
34
|
+
command: currentCommand,
|
|
35
|
+
name: currentName,
|
|
36
|
+
startLine: currentSectionStart,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
currentCommand = match[1] ?? "";
|
|
40
|
+
const braceContent = line.slice(match.index + match[0].length);
|
|
41
|
+
const closing = braceContent.indexOf("}");
|
|
42
|
+
currentName = closing >= 0 ? braceContent.slice(0, closing) : "";
|
|
43
|
+
currentSectionStart = i + 1;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (currentName) {
|
|
47
|
+
sections.push({
|
|
48
|
+
command: currentCommand,
|
|
49
|
+
name: currentName,
|
|
50
|
+
startLine: currentSectionStart,
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
if (sections.length === 0) {
|
|
54
|
+
return [
|
|
55
|
+
{
|
|
56
|
+
id: uuid(),
|
|
57
|
+
content,
|
|
58
|
+
metadata: {
|
|
59
|
+
filePath,
|
|
60
|
+
startLine: 1,
|
|
61
|
+
endLine: lines.length,
|
|
62
|
+
language: this.language,
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
];
|
|
66
|
+
}
|
|
67
|
+
for (let i = 0; i < sections.length; i++) {
|
|
68
|
+
const section = sections[i];
|
|
69
|
+
const startLine = section.startLine;
|
|
70
|
+
const endLine = i + 1 < sections.length
|
|
71
|
+
? sections[i + 1].startLine - 1
|
|
72
|
+
: lines.length;
|
|
73
|
+
if (startLine > endLine)
|
|
74
|
+
continue;
|
|
75
|
+
const chunkContent = lines.slice(startLine - 1, endLine).join("\n").trim();
|
|
76
|
+
if (chunkContent.length === 0)
|
|
77
|
+
continue;
|
|
78
|
+
chunks.push({
|
|
79
|
+
id: uuid(),
|
|
80
|
+
content: chunkContent,
|
|
81
|
+
metadata: {
|
|
82
|
+
filePath,
|
|
83
|
+
startLine,
|
|
84
|
+
endLine,
|
|
85
|
+
language: this.language,
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
return chunks;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
export const texChunker = new TexChunker();
|
|
93
|
+
//# sourceMappingURL=tex.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tex.js","sourceRoot":"","sources":["../../src/chunker/tex.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,aAAa,GAAG,yDAAyD,CAAC;AAEhF,MAAM,OAAO,UAAU;IACZ,QAAQ,GAAG,OAAO,CAAC;IACnB,cAAc,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnC,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,OAAe;QAC3C,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE3C,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,MAAM,QAAQ,GAA2D,EAAE,CAAC;QAE5E,IAAI,cAAc,GAAG,KAAK,CAAC;QAC3B,IAAI,mBAAmB,GAAG,CAAC,CAAC;QAC5B,IAAI,cAAc,GAAG,EAAE,CAAC;QACxB,IAAI,WAAW,GAAG,EAAE,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAE5B,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACvD,IAAI,YAAY,EAAE,CAAC;gBACjB,cAAc,GAAG,IAAI,CAAC;gBACtB,SAAS;YACX,CAAC;YACD,IAAI,cAAc,EAAE,CAAC;gBACnB,IAAI,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC;oBACpC,cAAc,GAAG,KAAK,CAAC;gBACzB,CAAC;gBACD,SAAS;YACX,CAAC;YAED,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;YAC5B,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvC,IAAI,KAAK,EAAE,CAAC;gBACV,IAAI,WAAW,EAAE,CAAC;oBAChB,QAAQ,CAAC,IAAI,CAAC;wBACZ,OAAO,EAAE,cAAc;wBACvB,IAAI,EAAE,WAAW;wBACjB,SAAS,EAAE,mBAAmB;qBAC/B,CAAC,CAAC;gBACL,CAAC;gBACD,cAAc,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAChC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC/D,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAC1C,WAAW,GAAG,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACjE,mBAAmB,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,IAAI,WAAW,EAAE,CAAC;YAChB,QAAQ,CAAC,IAAI,CAAC;gBACZ,OAAO,EAAE,cAAc;gBACvB,IAAI,EAAE,WAAW;gBACjB,SAAS,EAAE,mBAAmB;aAC/B,CAAC,CAAC;QACL,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO;gBACL;oBACE,EAAE,EAAE,IAAI,EAAE;oBACV,OAAO;oBACP,QAAQ,EAAE;wBACR,QAAQ;wBACR,SAAS,EAAE,CAAC;wBACZ,OAAO,EAAE,KAAK,CAAC,MAAM;wBACrB,QAAQ,EAAE,IAAI,CAAC,QAAQ;qBACxB;iBACF;aACF,CAAC;QACJ,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;YACpC,MAAM,OAAO,GACX,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM;gBACrB,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,SAAS,GAAG,CAAC;gBAChC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;YAEnB,IAAI,SAAS,GAAG,OAAO;gBAAE,SAAS;YAElC,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3E,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAExC,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAI,EAAE;gBACV,OAAO,EAAE,YAAY;gBACrB,QAAQ,EAAE;oBACR,QAAQ;oBACR,SAAS;oBACT,OAAO;oBACP,QAAQ,EAAE,IAAI,CAAC,QAAQ;iBACxB;aACF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class TypeScriptChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "typescript";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "typescript";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const typescriptChunker: TypeScriptChunker;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class TypeScriptChunker extends TreeSitterChunker {
|
|
3
|
+
language = "typescript";
|
|
4
|
+
fileExtensions = [".ts", ".tsx"];
|
|
5
|
+
grammarName = "typescript";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"function_declaration",
|
|
8
|
+
"method_definition",
|
|
9
|
+
"class_declaration",
|
|
10
|
+
"arrow_function",
|
|
11
|
+
"interface_declaration",
|
|
12
|
+
"type_alias_declaration",
|
|
13
|
+
"export_statement",
|
|
14
|
+
]);
|
|
15
|
+
}
|
|
16
|
+
export const typescriptChunker = new TypeScriptChunker();
|
|
17
|
+
//# sourceMappingURL=typescript.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"typescript.js","sourceRoot":"","sources":["../../src/chunker/typescript.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,iBAAkB,SAAQ,iBAAiB;IAC7C,QAAQ,GAAG,YAAY,CAAC;IACxB,cAAc,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IACjC,WAAW,GAAG,YAAY,CAAC;IAC3B,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,sBAAsB;QACtB,mBAAmB;QACnB,mBAAmB;QACnB,gBAAgB;QAChB,uBAAuB;QACvB,wBAAwB;QACxB,kBAAkB;KACnB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,IAAI,iBAAiB,EAAE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function uuid(): string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"uuid.js","sourceRoot":"","sources":["../../src/chunker/uuid.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,IAAI;IAClB,OAAO,sCAAsC,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE;QACnE,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;QAC1C,OAAO,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACxB,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class XmlChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "xml";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "xml";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const xmlChunker: XmlChunker;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class XmlChunker extends TreeSitterChunker {
|
|
3
|
+
language = "xml";
|
|
4
|
+
fileExtensions = [".xml", ".csproj"];
|
|
5
|
+
grammarName = "xml";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"element",
|
|
8
|
+
]);
|
|
9
|
+
}
|
|
10
|
+
export const xmlChunker = new XmlChunker();
|
|
11
|
+
//# sourceMappingURL=xml.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"xml.js","sourceRoot":"","sources":["../../src/chunker/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,UAAW,SAAQ,iBAAiB;IACtC,QAAQ,GAAG,KAAK,CAAC;IACjB,cAAc,GAAG,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IACrC,WAAW,GAAG,KAAK,CAAC;IACpB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,SAAS;KACV,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC"}
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function runCli(argv?: string[]): Promise<void>;
|