opencode-rag-plugin 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/ReadMe.md +423 -0
- package/dist/chunker/base.d.ts +10 -0
- package/dist/chunker/base.js +34 -0
- package/dist/chunker/base.js.map +1 -0
- package/dist/chunker/c.d.ts +8 -0
- package/dist/chunker/c.js +16 -0
- package/dist/chunker/c.js.map +1 -0
- package/dist/chunker/cpp.d.ts +8 -0
- package/dist/chunker/cpp.js +17 -0
- package/dist/chunker/cpp.js.map +1 -0
- package/dist/chunker/csharp.d.ts +8 -0
- package/dist/chunker/csharp.js +17 -0
- package/dist/chunker/csharp.js.map +1 -0
- package/dist/chunker/css.d.ts +8 -0
- package/dist/chunker/css.js +14 -0
- package/dist/chunker/css.js.map +1 -0
- package/dist/chunker/factory.d.ts +27 -0
- package/dist/chunker/factory.js +138 -0
- package/dist/chunker/factory.js.map +1 -0
- package/dist/chunker/fallback.d.ts +8 -0
- package/dist/chunker/fallback.js +34 -0
- package/dist/chunker/fallback.js.map +1 -0
- package/dist/chunker/go.d.ts +8 -0
- package/dist/chunker/go.js +13 -0
- package/dist/chunker/go.js.map +1 -0
- package/dist/chunker/grammar.d.ts +12 -0
- package/dist/chunker/grammar.js +43 -0
- package/dist/chunker/grammar.js.map +1 -0
- package/dist/chunker/html.d.ts +8 -0
- package/dist/chunker/html.js +12 -0
- package/dist/chunker/html.js.map +1 -0
- package/dist/chunker/java.d.ts +8 -0
- package/dist/chunker/java.js +14 -0
- package/dist/chunker/java.js.map +1 -0
- package/dist/chunker/javascript.d.ts +8 -0
- package/dist/chunker/javascript.js +15 -0
- package/dist/chunker/javascript.js.map +1 -0
- package/dist/chunker/json.d.ts +8 -0
- package/dist/chunker/json.js +11 -0
- package/dist/chunker/json.js.map +1 -0
- package/dist/chunker/kotlin.d.ts +8 -0
- package/dist/chunker/kotlin.js +15 -0
- package/dist/chunker/kotlin.js.map +1 -0
- package/dist/chunker/loader.d.ts +2 -0
- package/dist/chunker/loader.js +27 -0
- package/dist/chunker/loader.js.map +1 -0
- package/dist/chunker/markdown.d.ts +7 -0
- package/dist/chunker/markdown.js +96 -0
- package/dist/chunker/markdown.js.map +1 -0
- package/dist/chunker/pdf.d.ts +8 -0
- package/dist/chunker/pdf.js +93 -0
- package/dist/chunker/pdf.js.map +1 -0
- package/dist/chunker/python.d.ts +8 -0
- package/dist/chunker/python.js +13 -0
- package/dist/chunker/python.js.map +1 -0
- package/dist/chunker/razor.d.ts +7 -0
- package/dist/chunker/razor.js +85 -0
- package/dist/chunker/razor.js.map +1 -0
- package/dist/chunker/ruby.d.ts +8 -0
- package/dist/chunker/ruby.js +14 -0
- package/dist/chunker/ruby.js.map +1 -0
- package/dist/chunker/rust.d.ts +8 -0
- package/dist/chunker/rust.js +17 -0
- package/dist/chunker/rust.js.map +1 -0
- package/dist/chunker/sln.d.ts +9 -0
- package/dist/chunker/sln.js +65 -0
- package/dist/chunker/sln.js.map +1 -0
- package/dist/chunker/swift.d.ts +8 -0
- package/dist/chunker/swift.js +17 -0
- package/dist/chunker/swift.js.map +1 -0
- package/dist/chunker/tex.d.ts +7 -0
- package/dist/chunker/tex.js +93 -0
- package/dist/chunker/tex.js.map +1 -0
- package/dist/chunker/typescript.d.ts +8 -0
- package/dist/chunker/typescript.js +17 -0
- package/dist/chunker/typescript.js.map +1 -0
- package/dist/chunker/uuid.d.ts +1 -0
- package/dist/chunker/uuid.js +8 -0
- package/dist/chunker/uuid.js.map +1 -0
- package/dist/chunker/xml.d.ts +8 -0
- package/dist/chunker/xml.js +11 -0
- package/dist/chunker/xml.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +291 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/config.d.ts +59 -0
- package/dist/core/config.js +127 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/fileLogger.d.ts +6 -0
- package/dist/core/fileLogger.js +32 -0
- package/dist/core/fileLogger.js.map +1 -0
- package/dist/core/interfaces.d.ts +31 -0
- package/dist/core/interfaces.js +2 -0
- package/dist/core/interfaces.js.map +1 -0
- package/dist/core/manifest.d.ts +21 -0
- package/dist/core/manifest.js +48 -0
- package/dist/core/manifest.js.map +1 -0
- package/dist/embedder/factory.d.ts +4 -0
- package/dist/embedder/factory.js +27 -0
- package/dist/embedder/factory.js.map +1 -0
- package/dist/embedder/http.d.ts +11 -0
- package/dist/embedder/http.js +309 -0
- package/dist/embedder/http.js.map +1 -0
- package/dist/embedder/ollama.d.ts +14 -0
- package/dist/embedder/ollama.js +60 -0
- package/dist/embedder/ollama.js.map +1 -0
- package/dist/embedder/openai.d.ts +12 -0
- package/dist/embedder/openai.js +33 -0
- package/dist/embedder/openai.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer.d.ts +49 -0
- package/dist/indexer.js +336 -0
- package/dist/indexer.js.map +1 -0
- package/dist/plugin-entry.d.ts +4 -0
- package/dist/plugin-entry.js +5 -0
- package/dist/plugin-entry.js.map +1 -0
- package/dist/plugin.d.ts +22 -0
- package/dist/plugin.js +477 -0
- package/dist/plugin.js.map +1 -0
- package/dist/retriever/retriever.d.ts +5 -0
- package/dist/retriever/retriever.js +14 -0
- package/dist/retriever/retriever.js.map +1 -0
- package/dist/types/opencode-plugin.d.ts +51 -0
- package/dist/vectorstore/lancedb.d.ts +18 -0
- package/dist/vectorstore/lancedb.js +196 -0
- package/dist/vectorstore/lancedb.js.map +1 -0
- package/dist/watcher.d.ts +14 -0
- package/dist/watcher.js +88 -0
- package/dist/watcher.js.map +1 -0
- package/package.json +82 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { typescriptChunker } from "./typescript.js";
|
|
2
|
+
import { pythonChunker } from "./python.js";
|
|
3
|
+
import { javaChunker } from "./java.js";
|
|
4
|
+
import { goChunker } from "./go.js";
|
|
5
|
+
import { markdownChunker } from "./markdown.js";
|
|
6
|
+
import { cChunker } from "./c.js";
|
|
7
|
+
import { cppChunker } from "./cpp.js";
|
|
8
|
+
import { csharpChunker } from "./csharp.js";
|
|
9
|
+
import { javascriptChunker } from "./javascript.js";
|
|
10
|
+
import { razorChunker } from "./razor.js";
|
|
11
|
+
import { jsonChunker } from "./json.js";
|
|
12
|
+
import { htmlChunker } from "./html.js";
|
|
13
|
+
import { cssChunker } from "./css.js";
|
|
14
|
+
import { xmlChunker } from "./xml.js";
|
|
15
|
+
import { slnChunker } from "./sln.js";
|
|
16
|
+
import { rustChunker } from "./rust.js";
|
|
17
|
+
import { rubyChunker } from "./ruby.js";
|
|
18
|
+
import { kotlinChunker } from "./kotlin.js";
|
|
19
|
+
import { swiftChunker } from "./swift.js";
|
|
20
|
+
import { texChunker } from "./tex.js";
|
|
21
|
+
import { fallbackChunker } from "./fallback.js";
|
|
22
|
+
import { pdfChunker } from "./pdf.js";
|
|
23
|
+
import { uuid } from "./uuid.js";
|
|
24
|
+
const chunkers = [
|
|
25
|
+
typescriptChunker,
|
|
26
|
+
pythonChunker,
|
|
27
|
+
javaChunker,
|
|
28
|
+
goChunker,
|
|
29
|
+
markdownChunker,
|
|
30
|
+
cChunker,
|
|
31
|
+
cppChunker,
|
|
32
|
+
csharpChunker,
|
|
33
|
+
javascriptChunker,
|
|
34
|
+
razorChunker,
|
|
35
|
+
jsonChunker,
|
|
36
|
+
htmlChunker,
|
|
37
|
+
cssChunker,
|
|
38
|
+
xmlChunker,
|
|
39
|
+
slnChunker,
|
|
40
|
+
rustChunker,
|
|
41
|
+
rubyChunker,
|
|
42
|
+
kotlinChunker,
|
|
43
|
+
swiftChunker,
|
|
44
|
+
texChunker,
|
|
45
|
+
pdfChunker,
|
|
46
|
+
];
|
|
47
|
+
const extensionMap = new Map();
|
|
48
|
+
for (const chunker of chunkers) {
|
|
49
|
+
if ("fileExtensions" in chunker) {
|
|
50
|
+
const ce = chunker;
|
|
51
|
+
for (const ext of ce.fileExtensions) {
|
|
52
|
+
extensionMap.set(ext, chunker);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
export function registerChunker(chunker, extensions) {
|
|
57
|
+
const exts = extensions ?? ("fileExtensions" in chunker
|
|
58
|
+
? chunker.fileExtensions
|
|
59
|
+
: []);
|
|
60
|
+
for (const ext of exts) {
|
|
61
|
+
const lower = ext.toLowerCase();
|
|
62
|
+
if (extensionMap.has(lower)) {
|
|
63
|
+
console.warn(`[opencode-rag] Chunker for "${lower}" already registered — skipping pluggable chunker "${chunker.language}"`);
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
extensionMap.set(lower, chunker);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
export function getChunker(filePath) {
|
|
70
|
+
const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
|
|
71
|
+
return extensionMap.get(ext) ?? fallbackChunker;
|
|
72
|
+
}
|
|
73
|
+
const MAX_CHUNK_LINES = 100;
|
|
74
|
+
const MAX_CHUNK_CHARS = 8000;
|
|
75
|
+
function splitOversized(chunks, filePath) {
|
|
76
|
+
const result = [];
|
|
77
|
+
for (const chunk of chunks) {
|
|
78
|
+
const lines = chunk.content.split("\n");
|
|
79
|
+
if (lines.length <= MAX_CHUNK_LINES && chunk.content.length <= MAX_CHUNK_CHARS) {
|
|
80
|
+
result.push(chunk);
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
const subChunks = [];
|
|
84
|
+
let currentLines = [];
|
|
85
|
+
let currentCharCount = 0;
|
|
86
|
+
let lineOffset = 0;
|
|
87
|
+
for (let i = 0; i < lines.length; i++) {
|
|
88
|
+
const line = lines[i];
|
|
89
|
+
const lineLen = line.length + 1;
|
|
90
|
+
if (currentLines.length > 0 &&
|
|
91
|
+
(currentLines.length >= MAX_CHUNK_LINES || currentCharCount + lineLen > MAX_CHUNK_CHARS)) {
|
|
92
|
+
subChunks.push({
|
|
93
|
+
id: uuid(),
|
|
94
|
+
content: currentLines.join("\n"),
|
|
95
|
+
metadata: {
|
|
96
|
+
filePath,
|
|
97
|
+
startLine: chunk.metadata.startLine + lineOffset,
|
|
98
|
+
endLine: chunk.metadata.startLine + i - 1,
|
|
99
|
+
language: chunk.metadata.language,
|
|
100
|
+
},
|
|
101
|
+
});
|
|
102
|
+
currentLines = [];
|
|
103
|
+
currentCharCount = 0;
|
|
104
|
+
lineOffset = i;
|
|
105
|
+
}
|
|
106
|
+
currentLines.push(line);
|
|
107
|
+
currentCharCount += lineLen;
|
|
108
|
+
}
|
|
109
|
+
if (currentLines.length > 0) {
|
|
110
|
+
subChunks.push({
|
|
111
|
+
id: uuid(),
|
|
112
|
+
content: currentLines.join("\n"),
|
|
113
|
+
metadata: {
|
|
114
|
+
filePath,
|
|
115
|
+
startLine: chunk.metadata.startLine + lineOffset,
|
|
116
|
+
endLine: chunk.metadata.startLine + lines.length - 1,
|
|
117
|
+
language: chunk.metadata.language,
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
for (const sub of subChunks) {
|
|
122
|
+
if (sub.content.trim().length > 0) {
|
|
123
|
+
result.push(sub);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return result;
|
|
128
|
+
}
|
|
129
|
+
export async function chunkFile(filePath, content) {
|
|
130
|
+
const chunker = getChunker(filePath);
|
|
131
|
+
const chunks = await chunker.chunk(filePath, content);
|
|
132
|
+
if (chunks.length === 0) {
|
|
133
|
+
return fallbackChunker.chunk(filePath, content);
|
|
134
|
+
}
|
|
135
|
+
return splitOversized(chunks, filePath);
|
|
136
|
+
}
|
|
137
|
+
export { typescriptChunker, pythonChunker, javaChunker, goChunker, markdownChunker, cChunker, cppChunker, csharpChunker, javascriptChunker, razorChunker, jsonChunker, htmlChunker, cssChunker, xmlChunker, slnChunker, rustChunker, rubyChunker, kotlinChunker, swiftChunker, texChunker, pdfChunker, fallbackChunker };
|
|
138
|
+
//# sourceMappingURL=factory.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"factory.js","sourceRoot":"","sources":["../../src/chunker/factory.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC1C,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,QAAQ,GAAc;IAC1B,iBAAiB;IACjB,aAAa;IACb,WAAW;IACX,SAAS;IACT,eAAe;IACf,QAAQ;IACR,UAAU;IACV,aAAa;IACb,iBAAiB;IACjB,YAAY;IACZ,WAAW;IACX,WAAW;IACX,UAAU;IACV,UAAU;IACV,UAAU;IACV,WAAW;IACX,WAAW;IACX,aAAa;IACb,YAAY;IACZ,UAAU;IACV,UAAU;CACX,CAAC;AAEF,MAAM,YAAY,GAAG,IAAI,GAAG,EAAmB,CAAC;AAEhD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;IAC/B,IAAI,gBAAgB,IAAI,OAAO,EAAE,CAAC;QAChC,MAAM,EAAE,GAAG,OAAwD,CAAC;QACpE,KAAK,MAAM,GAAG,IAAI,EAAE,CAAC,cAAc,EAAE,CAAC;YACpC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAgB,EAChB,UAAqB;IAErB,MAAM,IAAI,GAAG,UAAU,IAAI,CAAC,gBAAgB,IAAI,OAAO;QACrD,CAAC,CAAE,OAAyD,CAAC,cAAc;QAC3E,CAAC,CAAC,EAAE,CAAC,CAAC;IAER,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,CAAC,IAAI,CACV,+BAA+B,KAAK,sDAAsD,OAAO,CAAC,QAAQ,GAAG,CAC9G,CAAC;YACF,SAAS;QACX,CAAC;QACD,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;AACH,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IACpE,OAAO,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,eAAe,CAAC;AAClD,CAAC;AAED,MAAM,eAAe,GAAG,GAAG,CAAC;AAC5B,MAAM,eAAe,GAAG,IAAI,CAAC;AAE7B,SAAS,cAAc,CAAC,MAAe,EAAE,QAAgB;IACvD,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACxC,IAAI,KAAK,CAAC,MAAM,IAAI,eAAe,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,IAAI,eAAe,EAAE,CAAC;YAC/E,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,SAAS;QACX,CAAC;QAED,MAAM,SAAS,GAAY,EAAE,CAAC;QAC9B,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC;YACvB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAEhC,IACE,YAAY,CAAC,MAAM,GAAG,CAAC;gBACvB,CAAC,YAAY,CAAC,MAAM,IAAI,eAAe,IAAI,gBAAgB,GAAG,OAAO,GAAG,eAAe,CAAC,EACxF,CAAC;gBACD,SAAS,CAAC,IAAI,CAAC;oBACb,EAAE,EAAE,IAAI,EAAE;oBACV,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;oBAChC,QAAQ,EAAE;wBACR,QAAQ;wBACR,SAAS,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS,GAAG,UAAU;wBAChD,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS,GAAG,CAAC,GAAG,CAAC;wBACzC,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;qBAClC;iBACF,CAAC,CAAC;gBACH,YAAY,GAAG,EAAE,CAAC;gBAClB,gBAAgB,GAAG,CAAC,CAAC;gBACrB,UAAU,GAAG,CAAC,CAAC;YACjB,CAAC;YAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,gBAAgB,IAAI,OAAO,CAAC;QAC9B,CAAC;QAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,SAAS,CAAC,IAAI,CAAC;gBACb,EAAE,EAAE,IAAI,EAAE;gBACV,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;gBAChC,QAAQ,EAAE;oBACR,QAAQ;oBACR,SAAS,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS,GAAG,UAAU;oBAChD,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC;oBACpD,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;iBAClC;aACF,CAAC,CAAC;QACL,CAAC;QAED,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,IAAI,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACnB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,QAAgB,EAChB,OAAe;IAEf,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,eAAe,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,cAAc,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;AAC1C,CAAC;AAED,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,WAAW,EAAE,SAAS,EAAE,eAAe,EAAE,QAAQ,EAAE,UAAU,EAAE,aAAa,EAAE,iBAAiB,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,UAAU,EAAE,UAAU,EAAE,UAAU,EAAE,WAAW,EAAE,WAAW,EAAE,aAAa,EAAE,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,eAAe,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { Chunker, Chunk } from "../core/interfaces.js";
|
|
2
|
+
export declare class FallbackChunker implements Chunker {
|
|
3
|
+
readonly language = "text";
|
|
4
|
+
private maxLines;
|
|
5
|
+
constructor(maxLines?: number);
|
|
6
|
+
chunk(filePath: string, content: string): Promise<Chunk[]>;
|
|
7
|
+
}
|
|
8
|
+
export declare const fallbackChunker: FallbackChunker;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { uuid } from "./uuid.js";
|
|
2
|
+
const DEFAULT_MAX_LINES = 100;
|
|
3
|
+
export class FallbackChunker {
|
|
4
|
+
language = "text";
|
|
5
|
+
maxLines;
|
|
6
|
+
constructor(maxLines = DEFAULT_MAX_LINES) {
|
|
7
|
+
this.maxLines = maxLines;
|
|
8
|
+
}
|
|
9
|
+
async chunk(filePath, content) {
|
|
10
|
+
const lines = content.split("\n");
|
|
11
|
+
if (lines.length === 0)
|
|
12
|
+
return [];
|
|
13
|
+
const chunks = [];
|
|
14
|
+
for (let start = 0; start < lines.length; start += this.maxLines) {
|
|
15
|
+
const end = Math.min(start + this.maxLines, lines.length);
|
|
16
|
+
const chunkContent = lines.slice(start, end).join("\n").trim();
|
|
17
|
+
if (chunkContent.length === 0)
|
|
18
|
+
continue;
|
|
19
|
+
chunks.push({
|
|
20
|
+
id: uuid(),
|
|
21
|
+
content: chunkContent,
|
|
22
|
+
metadata: {
|
|
23
|
+
filePath,
|
|
24
|
+
startLine: start + 1,
|
|
25
|
+
endLine: end,
|
|
26
|
+
language: this.language,
|
|
27
|
+
},
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
return chunks;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
export const fallbackChunker = new FallbackChunker();
|
|
34
|
+
//# sourceMappingURL=fallback.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fallback.js","sourceRoot":"","sources":["../../src/chunker/fallback.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B,MAAM,OAAO,eAAe;IACjB,QAAQ,GAAG,MAAM,CAAC;IAEnB,QAAQ,CAAS;IAEzB,YAAY,WAAmB,iBAAiB;QAC9C,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,OAAe;QAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAElC,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACjE,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;YAC1D,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YAC/D,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAExC,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAI,EAAE;gBACV,OAAO,EAAE,YAAY;gBACrB,QAAQ,EAAE;oBACR,QAAQ;oBACR,SAAS,EAAE,KAAK,GAAG,CAAC;oBACpB,OAAO,EAAE,GAAG;oBACZ,QAAQ,EAAE,IAAI,CAAC,QAAQ;iBACxB;aACF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class GoChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "go";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "go";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const goChunker: GoChunker;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class GoChunker extends TreeSitterChunker {
|
|
3
|
+
language = "go";
|
|
4
|
+
fileExtensions = [".go"];
|
|
5
|
+
grammarName = "go";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"function_declaration",
|
|
8
|
+
"method_declaration",
|
|
9
|
+
"type_declaration",
|
|
10
|
+
]);
|
|
11
|
+
}
|
|
12
|
+
export const goChunker = new GoChunker();
|
|
13
|
+
//# sourceMappingURL=go.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"go.js","sourceRoot":"","sources":["../../src/chunker/go.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,SAAU,SAAQ,iBAAiB;IACrC,QAAQ,GAAG,IAAI,CAAC;IAChB,cAAc,GAAG,CAAC,KAAK,CAAC,CAAC;IACzB,WAAW,GAAG,IAAI,CAAC;IACnB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,sBAAsB;QACtB,oBAAoB;QACpB,kBAAkB;KACnB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,SAAS,GAAG,IAAI,SAAS,EAAE,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Language, Node } from "web-tree-sitter";
|
|
2
|
+
export declare function initParser(): Promise<void>;
|
|
3
|
+
export declare function loadLanguage(lang: string): Promise<Language>;
|
|
4
|
+
export interface AstNode {
|
|
5
|
+
text: string;
|
|
6
|
+
startLine: number;
|
|
7
|
+
endLine: number;
|
|
8
|
+
startIndex: number;
|
|
9
|
+
endIndex: number;
|
|
10
|
+
type: string;
|
|
11
|
+
}
|
|
12
|
+
export declare function walkTree(node: Node, nodeTypes: Set<string>, source: string, maxDepth?: number, depth?: number): AstNode[];
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { Parser, Language } from "web-tree-sitter";
|
|
2
|
+
import { getWasmPath } from "tree-sitter-wasm";
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
let initialized = false;
|
|
5
|
+
export async function initParser() {
|
|
6
|
+
if (initialized)
|
|
7
|
+
return;
|
|
8
|
+
await Parser.init();
|
|
9
|
+
initialized = true;
|
|
10
|
+
}
|
|
11
|
+
const grammarCache = new Map();
|
|
12
|
+
export async function loadLanguage(lang) {
|
|
13
|
+
const cached = grammarCache.get(lang);
|
|
14
|
+
if (cached)
|
|
15
|
+
return cached;
|
|
16
|
+
await initParser();
|
|
17
|
+
const wasmPath = getWasmPath(lang);
|
|
18
|
+
const buffer = readFileSync(wasmPath);
|
|
19
|
+
const language = await Language.load(buffer);
|
|
20
|
+
grammarCache.set(lang, language);
|
|
21
|
+
return language;
|
|
22
|
+
}
|
|
23
|
+
export function walkTree(node, nodeTypes, source, maxDepth = 10, depth = 0) {
|
|
24
|
+
const results = [];
|
|
25
|
+
if (nodeTypes.has(node.type) && depth > 0) {
|
|
26
|
+
results.push({
|
|
27
|
+
text: source.slice(node.startIndex, node.endIndex),
|
|
28
|
+
startLine: node.startPosition.row + 1,
|
|
29
|
+
endLine: node.endPosition.row + 1,
|
|
30
|
+
startIndex: node.startIndex,
|
|
31
|
+
endIndex: node.endIndex,
|
|
32
|
+
type: node.type,
|
|
33
|
+
});
|
|
34
|
+
return results;
|
|
35
|
+
}
|
|
36
|
+
if (depth < maxDepth) {
|
|
37
|
+
for (const child of node.children) {
|
|
38
|
+
results.push(...walkTree(child, nodeTypes, source, maxDepth, depth + 1));
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return results;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=grammar.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grammar.js","sourceRoot":"","sources":["../../src/chunker/grammar.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAQ,MAAM,iBAAiB,CAAC;AACzD,OAAO,EAAE,WAAW,EAA0B,MAAM,kBAAkB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAEvC,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,IAAI,WAAW;QAAE,OAAO;IACxB,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;IACpB,WAAW,GAAG,IAAI,CAAC;AACrB,CAAC;AAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAoB,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAY;IAC7C,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACtC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAE1B,MAAM,UAAU,EAAE,CAAC;IACnB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAyB,CAAC,CAAC;IACxD,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC7C,YAAY,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IACjC,OAAO,QAAQ,CAAC;AAClB,CAAC;AAWD,MAAM,UAAU,QAAQ,CACtB,IAAU,EACV,SAAsB,EACtB,MAAc,EACd,WAAmB,EAAE,EACrB,QAAgB,CAAC;IAEjB,MAAM,OAAO,GAAc,EAAE,CAAC;IAE9B,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QAC1C,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC;YAClD,SAAS,EAAE,IAAI,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC;YACrC,OAAO,EAAE,IAAI,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC;YACjC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;SAChB,CAAC,CAAC;QACH,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;QACrB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClC,OAAO,CAAC,IAAI,CACV,GAAG,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,GAAG,CAAC,CAAC,CAC3D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class HtmlChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "html";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "html";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const htmlChunker: HtmlChunker;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class HtmlChunker extends TreeSitterChunker {
|
|
3
|
+
language = "html";
|
|
4
|
+
fileExtensions = [".html", ".htm"];
|
|
5
|
+
grammarName = "html";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"script_element",
|
|
8
|
+
"style_element",
|
|
9
|
+
]);
|
|
10
|
+
}
|
|
11
|
+
export const htmlChunker = new HtmlChunker();
|
|
12
|
+
//# sourceMappingURL=html.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html.js","sourceRoot":"","sources":["../../src/chunker/html.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,WAAY,SAAQ,iBAAiB;IACvC,QAAQ,GAAG,MAAM,CAAC;IAClB,cAAc,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IACnC,WAAW,GAAG,MAAM,CAAC;IACrB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,gBAAgB;QAChB,eAAe;KAChB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class JavaChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "java";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "java";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const javaChunker: JavaChunker;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class JavaChunker extends TreeSitterChunker {
|
|
3
|
+
language = "java";
|
|
4
|
+
fileExtensions = [".java"];
|
|
5
|
+
grammarName = "java";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"method_declaration",
|
|
8
|
+
"class_declaration",
|
|
9
|
+
"interface_declaration",
|
|
10
|
+
"enum_declaration",
|
|
11
|
+
]);
|
|
12
|
+
}
|
|
13
|
+
export const javaChunker = new JavaChunker();
|
|
14
|
+
//# sourceMappingURL=java.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"java.js","sourceRoot":"","sources":["../../src/chunker/java.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,WAAY,SAAQ,iBAAiB;IACvC,QAAQ,GAAG,MAAM,CAAC;IAClB,cAAc,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,WAAW,GAAG,MAAM,CAAC;IACrB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,oBAAoB;QACpB,mBAAmB;QACnB,uBAAuB;QACvB,kBAAkB;KACnB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class JavaScriptChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "javascript";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "javascript";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const javascriptChunker: JavaScriptChunker;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class JavaScriptChunker extends TreeSitterChunker {
|
|
3
|
+
language = "javascript";
|
|
4
|
+
fileExtensions = [".js", ".jsx", ".mjs", ".cjs"];
|
|
5
|
+
grammarName = "javascript";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"function_declaration",
|
|
8
|
+
"method_definition",
|
|
9
|
+
"class_declaration",
|
|
10
|
+
"arrow_function",
|
|
11
|
+
"export_statement",
|
|
12
|
+
]);
|
|
13
|
+
}
|
|
14
|
+
export const javascriptChunker = new JavaScriptChunker();
|
|
15
|
+
//# sourceMappingURL=javascript.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"javascript.js","sourceRoot":"","sources":["../../src/chunker/javascript.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,iBAAkB,SAAQ,iBAAiB;IAC7C,QAAQ,GAAG,YAAY,CAAC;IACxB,cAAc,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACjD,WAAW,GAAG,YAAY,CAAC;IAC3B,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,sBAAsB;QACtB,mBAAmB;QACnB,mBAAmB;QACnB,gBAAgB;QAChB,kBAAkB;KACnB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,IAAI,iBAAiB,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class JsonChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "json";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "json";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const jsonChunker: JsonChunker;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class JsonChunker extends TreeSitterChunker {
|
|
3
|
+
language = "json";
|
|
4
|
+
fileExtensions = [".json"];
|
|
5
|
+
grammarName = "json";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"pair",
|
|
8
|
+
]);
|
|
9
|
+
}
|
|
10
|
+
export const jsonChunker = new JsonChunker();
|
|
11
|
+
//# sourceMappingURL=json.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/chunker/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,WAAY,SAAQ,iBAAiB;IACvC,QAAQ,GAAG,MAAM,CAAC;IAClB,cAAc,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,WAAW,GAAG,MAAM,CAAC;IACrB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,MAAM;KACP,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export declare class KotlinChunker extends TreeSitterChunker {
|
|
3
|
+
readonly language = "kotlin";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
readonly grammarName = "kotlin";
|
|
6
|
+
readonly nodeTypes: Set<string>;
|
|
7
|
+
}
|
|
8
|
+
export declare const kotlinChunker: KotlinChunker;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { TreeSitterChunker } from "./base.js";
|
|
2
|
+
export class KotlinChunker extends TreeSitterChunker {
|
|
3
|
+
language = "kotlin";
|
|
4
|
+
fileExtensions = [".kt", ".kts"];
|
|
5
|
+
grammarName = "kotlin";
|
|
6
|
+
nodeTypes = new Set([
|
|
7
|
+
"function_declaration",
|
|
8
|
+
"class_declaration",
|
|
9
|
+
"interface_declaration",
|
|
10
|
+
"object_declaration",
|
|
11
|
+
"property_declaration",
|
|
12
|
+
]);
|
|
13
|
+
}
|
|
14
|
+
export const kotlinChunker = new KotlinChunker();
|
|
15
|
+
//# sourceMappingURL=kotlin.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kotlin.js","sourceRoot":"","sources":["../../src/chunker/kotlin.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,MAAM,OAAO,aAAc,SAAQ,iBAAiB;IACzC,QAAQ,GAAG,QAAQ,CAAC;IACpB,cAAc,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IACjC,WAAW,GAAG,QAAQ,CAAC;IACvB,SAAS,GAAG,IAAI,GAAG,CAAC;QAC3B,sBAAsB;QACtB,mBAAmB;QACnB,uBAAuB;QACvB,oBAAoB;QACpB,sBAAsB;KACvB,CAAC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,IAAI,aAAa,EAAE,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { registerChunker } from "./factory.js";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { pathToFileURL } from "node:url";
|
|
4
|
+
async function loadSingleChunker(entry, configDir) {
|
|
5
|
+
const resolved = path.resolve(configDir, entry.module);
|
|
6
|
+
const moduleUrl = pathToFileURL(resolved).href;
|
|
7
|
+
try {
|
|
8
|
+
const mod = await import(moduleUrl);
|
|
9
|
+
const chunker = mod.default ?? mod;
|
|
10
|
+
if (typeof chunker.chunk !== "function") {
|
|
11
|
+
console.warn(`[opencode-rag] Module "${entry.module}" does not export a valid Chunker (no .chunk() method) — skipping`);
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
registerChunker(chunker, entry.extensions);
|
|
15
|
+
}
|
|
16
|
+
catch (err) {
|
|
17
|
+
console.warn(`[opencode-rag] Failed to load chunker module "${entry.module}":`, err.message);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export async function loadChunkersFromConfig(config, configDir) {
|
|
21
|
+
if (!config.chunkers || config.chunkers.length === 0)
|
|
22
|
+
return;
|
|
23
|
+
for (const entry of config.chunkers) {
|
|
24
|
+
await loadSingleChunker(entry, configDir);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/chunker/loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,KAAK,UAAU,iBAAiB,CAC9B,KAAoB,EACpB,SAAiB;IAEjB,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvD,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;IAC/C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;QAEpC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC;QACnC,IAAI,OAAO,OAAO,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;YACxC,OAAO,CAAC,IAAI,CACV,0BAA0B,KAAK,CAAC,MAAM,mEAAmE,CAC1G,CAAC;YACF,OAAO;QACT,CAAC;QAED,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,IAAI,CACV,iDAAiD,KAAK,CAAC,MAAM,IAAI,EAChE,GAAa,CAAC,OAAO,CACvB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,MAAiB,EACjB,SAAiB;IAEjB,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAE7D,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpC,MAAM,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Chunker, Chunk } from "../core/interfaces.js";
|
|
2
|
+
export declare class MarkdownChunker implements Chunker {
|
|
3
|
+
readonly language = "markdown";
|
|
4
|
+
readonly fileExtensions: string[];
|
|
5
|
+
chunk(filePath: string, content: string): Promise<Chunk[]>;
|
|
6
|
+
}
|
|
7
|
+
export declare const markdownChunker: MarkdownChunker;
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { uuid } from "./uuid.js";
|
|
2
|
+
const HEADING_REGEX = /^(#{1,6})\s+(.+)$/gm;
|
|
3
|
+
export class MarkdownChunker {
|
|
4
|
+
language = "markdown";
|
|
5
|
+
fileExtensions = [".md", ".mdx"];
|
|
6
|
+
async chunk(filePath, content) {
|
|
7
|
+
if (content.trim().length === 0)
|
|
8
|
+
return [];
|
|
9
|
+
const chunks = [];
|
|
10
|
+
const lines = content.split("\n");
|
|
11
|
+
const sections = [];
|
|
12
|
+
let inCodeBlock = false;
|
|
13
|
+
let currentSectionStart = 1;
|
|
14
|
+
let currentHeading = "";
|
|
15
|
+
let currentLevel = 0;
|
|
16
|
+
for (let i = 0; i < lines.length; i++) {
|
|
17
|
+
const line = lines[i] ?? "";
|
|
18
|
+
if (line.trim().startsWith("```")) {
|
|
19
|
+
inCodeBlock = !inCodeBlock;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
if (inCodeBlock)
|
|
23
|
+
continue;
|
|
24
|
+
const match = HEADING_REGEX.exec(line);
|
|
25
|
+
HEADING_REGEX.lastIndex = 0;
|
|
26
|
+
if (match) {
|
|
27
|
+
if (currentHeading) {
|
|
28
|
+
sections.push({
|
|
29
|
+
heading: currentHeading,
|
|
30
|
+
level: currentLevel,
|
|
31
|
+
startLine: currentSectionStart,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
currentHeading = match[2] ?? "";
|
|
35
|
+
currentLevel = match[1]?.length ?? 1;
|
|
36
|
+
currentSectionStart = i + 1;
|
|
37
|
+
if (currentLevel <= 2) {
|
|
38
|
+
sections.push({
|
|
39
|
+
heading: currentHeading,
|
|
40
|
+
level: currentLevel,
|
|
41
|
+
startLine: currentSectionStart,
|
|
42
|
+
});
|
|
43
|
+
currentHeading = "";
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
// Last section
|
|
48
|
+
if (currentHeading) {
|
|
49
|
+
sections.push({
|
|
50
|
+
heading: currentHeading,
|
|
51
|
+
level: currentLevel,
|
|
52
|
+
startLine: currentSectionStart,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
// If no sections found, create one chunk for the whole file
|
|
56
|
+
if (sections.length === 0) {
|
|
57
|
+
return [
|
|
58
|
+
{
|
|
59
|
+
id: uuid(),
|
|
60
|
+
content,
|
|
61
|
+
metadata: {
|
|
62
|
+
filePath,
|
|
63
|
+
startLine: 1,
|
|
64
|
+
endLine: lines.length,
|
|
65
|
+
language: this.language,
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
];
|
|
69
|
+
}
|
|
70
|
+
for (let i = 0; i < sections.length; i++) {
|
|
71
|
+
const section = sections[i];
|
|
72
|
+
const startLine = section.startLine;
|
|
73
|
+
const endLine = i + 1 < sections.length
|
|
74
|
+
? sections[i + 1].startLine - 1
|
|
75
|
+
: lines.length;
|
|
76
|
+
if (startLine > endLine)
|
|
77
|
+
continue;
|
|
78
|
+
const chunkContent = lines.slice(startLine - 1, endLine).join("\n").trim();
|
|
79
|
+
if (chunkContent.length === 0)
|
|
80
|
+
continue;
|
|
81
|
+
chunks.push({
|
|
82
|
+
id: uuid(),
|
|
83
|
+
content: chunkContent,
|
|
84
|
+
metadata: {
|
|
85
|
+
filePath,
|
|
86
|
+
startLine,
|
|
87
|
+
endLine,
|
|
88
|
+
language: this.language,
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
return chunks;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
export const markdownChunker = new MarkdownChunker();
|
|
96
|
+
//# sourceMappingURL=markdown.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/chunker/markdown.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,aAAa,GAAG,qBAAqB,CAAC;AAE5C,MAAM,OAAO,eAAe;IACjB,QAAQ,GAAG,UAAU,CAAC;IACtB,cAAc,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAE1C,KAAK,CAAC,KAAK,CAAC,QAAgB,EAAE,OAAe;QAC3C,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE3C,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,MAAM,QAAQ,GAA4D,EAAE,CAAC;QAE7E,IAAI,WAAW,GAAG,KAAK,CAAC;QACxB,IAAI,mBAAmB,GAAG,CAAC,CAAC;QAC5B,IAAI,cAAc,GAAG,EAAE,CAAC;QACxB,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAE5B,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;gBAClC,WAAW,GAAG,CAAC,WAAW,CAAC;gBAC3B,SAAS;YACX,CAAC;YAED,IAAI,WAAW;gBAAE,SAAS;YAE1B,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvC,aAAa,CAAC,SAAS,GAAG,CAAC,CAAC;YAC5B,IAAI,KAAK,EAAE,CAAC;gBACV,IAAI,cAAc,EAAE,CAAC;oBACnB,QAAQ,CAAC,IAAI,CAAC;wBACZ,OAAO,EAAE,cAAc;wBACvB,KAAK,EAAE,YAAY;wBACnB,SAAS,EAAE,mBAAmB;qBAC/B,CAAC,CAAC;gBACL,CAAC;gBACD,cAAc,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAChC,YAAY,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC;gBACrC,mBAAmB,GAAG,CAAC,GAAG,CAAC,CAAC;gBAE5B,IAAI,YAAY,IAAI,CAAC,EAAE,CAAC;oBACtB,QAAQ,CAAC,IAAI,CAAC;wBACZ,OAAO,EAAE,cAAc;wBACvB,KAAK,EAAE,YAAY;wBACnB,SAAS,EAAE,mBAAmB;qBAC/B,CAAC,CAAC;oBACH,cAAc,GAAG,EAAE,CAAC;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QAED,eAAe;QACf,IAAI,cAAc,EAAE,CAAC;YACnB,QAAQ,CAAC,IAAI,CAAC;gBACZ,OAAO,EAAE,cAAc;gBACvB,KAAK,EAAE,YAAY;gBACnB,SAAS,EAAE,mBAAmB;aAC/B,CAAC,CAAC;QACL,CAAC;QAED,4DAA4D;QAC5D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO;gBACL;oBACE,EAAE,EAAE,IAAI,EAAE;oBACV,OAAO;oBACP,QAAQ,EAAE;wBACR,QAAQ;wBACR,SAAS,EAAE,CAAC;wBACZ,OAAO,EAAE,KAAK,CAAC,MAAM;wBACrB,QAAQ,EAAE,IAAI,CAAC,QAAQ;qBACxB;iBACF;aACF,CAAC;QACJ,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;YACpC,MAAM,OAAO,GACX,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM;gBACrB,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,SAAS,GAAG,CAAC;gBAChC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;YAEnB,IAAI,SAAS,GAAG,OAAO;gBAAE,SAAS;YAElC,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3E,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAExC,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAI,EAAE;gBACV,OAAO,EAAE,YAAY;gBACrB,QAAQ,EAAE;oBACR,QAAQ;oBACR,SAAS;oBACT,OAAO;oBACP,QAAQ,EAAE,IAAI,CAAC,QAAQ;iBACxB;aACF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { Chunker, Chunk } from "../core/interfaces.js";
|
|
2
|
+
export declare function extractPdfText(buffer: Buffer): Promise<string>;
|
|
3
|
+
export declare class PdfChunker implements Chunker {
|
|
4
|
+
readonly language = "pdf";
|
|
5
|
+
readonly fileExtensions: string[];
|
|
6
|
+
chunk(filePath: string, content: string): Promise<Chunk[]>;
|
|
7
|
+
}
|
|
8
|
+
export declare const pdfChunker: PdfChunker;
|