@vpxa/kb 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/analyzers/dist/blast-radius-analyzer.js +13 -114
- package/packages/analyzers/dist/dependency-analyzer.js +11 -425
- package/packages/analyzers/dist/diagram-generator.js +4 -86
- package/packages/analyzers/dist/entry-point-analyzer.js +5 -239
- package/packages/analyzers/dist/index.js +1 -23
- package/packages/analyzers/dist/knowledge-producer.js +24 -113
- package/packages/analyzers/dist/pattern-analyzer.js +5 -359
- package/packages/analyzers/dist/regex-call-graph.js +1 -428
- package/packages/analyzers/dist/structure-analyzer.js +4 -258
- package/packages/analyzers/dist/symbol-analyzer.js +13 -442
- package/packages/analyzers/dist/ts-call-graph.js +1 -160
- package/packages/analyzers/dist/types.js +0 -1
- package/packages/chunker/dist/call-graph-extractor.js +1 -90
- package/packages/chunker/dist/chunker-factory.js +1 -36
- package/packages/chunker/dist/chunker.interface.js +0 -1
- package/packages/chunker/dist/code-chunker.js +14 -134
- package/packages/chunker/dist/generic-chunker.js +5 -72
- package/packages/chunker/dist/index.js +1 -21
- package/packages/chunker/dist/markdown-chunker.js +7 -119
- package/packages/chunker/dist/treesitter-chunker.js +8 -234
- package/packages/cli/dist/commands/analyze.js +3 -112
- package/packages/cli/dist/commands/context-cmds.js +1 -155
- package/packages/cli/dist/commands/environment.js +2 -204
- package/packages/cli/dist/commands/execution.js +1 -137
- package/packages/cli/dist/commands/graph.js +7 -81
- package/packages/cli/dist/commands/init.js +9 -87
- package/packages/cli/dist/commands/knowledge.js +1 -139
- package/packages/cli/dist/commands/search.js +8 -267
- package/packages/cli/dist/commands/system.js +4 -241
- package/packages/cli/dist/commands/workspace.js +2 -388
- package/packages/cli/dist/context.js +1 -14
- package/packages/cli/dist/helpers.js +3 -458
- package/packages/cli/dist/index.js +3 -69
- package/packages/cli/dist/kb-init.js +1 -82
- package/packages/cli/dist/types.js +0 -1
- package/packages/core/dist/constants.js +1 -43
- package/packages/core/dist/content-detector.js +1 -79
- package/packages/core/dist/errors.js +1 -40
- package/packages/core/dist/index.js +1 -9
- package/packages/core/dist/logger.js +1 -34
- package/packages/core/dist/types.js +0 -1
- package/packages/embeddings/dist/embedder.interface.js +0 -1
- package/packages/embeddings/dist/index.js +1 -5
- package/packages/embeddings/dist/onnx-embedder.js +1 -82
- package/packages/indexer/dist/file-hasher.js +1 -13
- package/packages/indexer/dist/filesystem-crawler.js +1 -125
- package/packages/indexer/dist/graph-extractor.js +1 -111
- package/packages/indexer/dist/incremental-indexer.js +1 -278
- package/packages/indexer/dist/index.js +1 -14
- package/packages/server/dist/api.js +1 -9
- package/packages/server/dist/config.js +1 -75
- package/packages/server/dist/curated-manager.js +9 -356
- package/packages/server/dist/index.js +1 -134
- package/packages/server/dist/replay-interceptor.js +1 -38
- package/packages/server/dist/resources/resources.js +2 -40
- package/packages/server/dist/server.js +1 -247
- package/packages/server/dist/tools/analyze.tools.js +1 -288
- package/packages/server/dist/tools/forge.tools.js +11 -499
- package/packages/server/dist/tools/forget.tool.js +3 -39
- package/packages/server/dist/tools/graph.tool.js +5 -110
- package/packages/server/dist/tools/list.tool.js +5 -53
- package/packages/server/dist/tools/lookup.tool.js +8 -51
- package/packages/server/dist/tools/onboard.tool.js +2 -112
- package/packages/server/dist/tools/produce.tool.js +4 -74
- package/packages/server/dist/tools/read.tool.js +4 -47
- package/packages/server/dist/tools/reindex.tool.js +2 -70
- package/packages/server/dist/tools/remember.tool.js +3 -42
- package/packages/server/dist/tools/replay.tool.js +6 -88
- package/packages/server/dist/tools/search.tool.js +17 -327
- package/packages/server/dist/tools/status.tool.js +3 -68
- package/packages/server/dist/tools/toolkit.tools.js +20 -1673
- package/packages/server/dist/tools/update.tool.js +3 -39
- package/packages/server/dist/tools/utility.tools.js +19 -456
- package/packages/store/dist/graph-store.interface.js +0 -1
- package/packages/store/dist/index.js +1 -9
- package/packages/store/dist/lance-store.js +1 -258
- package/packages/store/dist/sqlite-graph-store.js +8 -309
- package/packages/store/dist/store-factory.js +1 -14
- package/packages/store/dist/store.interface.js +0 -1
- package/packages/tools/dist/batch.js +1 -45
- package/packages/tools/dist/changelog.js +2 -112
- package/packages/tools/dist/check.js +2 -59
- package/packages/tools/dist/checkpoint.js +2 -43
- package/packages/tools/dist/codemod.js +2 -69
- package/packages/tools/dist/compact.js +3 -60
- package/packages/tools/dist/data-transform.js +1 -124
- package/packages/tools/dist/dead-symbols.js +2 -71
- package/packages/tools/dist/delegate.js +3 -128
- package/packages/tools/dist/diff-parse.js +3 -153
- package/packages/tools/dist/digest.js +7 -242
- package/packages/tools/dist/encode.js +1 -46
- package/packages/tools/dist/env-info.js +1 -58
- package/packages/tools/dist/eval.js +3 -79
- package/packages/tools/dist/evidence-map.js +3 -203
- package/packages/tools/dist/file-summary.js +2 -106
- package/packages/tools/dist/file-walk.js +1 -75
- package/packages/tools/dist/find-examples.js +3 -48
- package/packages/tools/dist/find.js +1 -120
- package/packages/tools/dist/forge-classify.js +2 -319
- package/packages/tools/dist/forge-ground.js +1 -184
- package/packages/tools/dist/git-context.js +3 -46
- package/packages/tools/dist/graph-query.js +1 -194
- package/packages/tools/dist/health.js +1 -118
- package/packages/tools/dist/http-request.js +1 -58
- package/packages/tools/dist/index.js +1 -273
- package/packages/tools/dist/lane.js +7 -227
- package/packages/tools/dist/measure.js +2 -119
- package/packages/tools/dist/onboard.js +42 -1136
- package/packages/tools/dist/parse-output.js +2 -158
- package/packages/tools/dist/process-manager.js +1 -69
- package/packages/tools/dist/queue.js +2 -126
- package/packages/tools/dist/regex-test.js +1 -39
- package/packages/tools/dist/rename.js +2 -70
- package/packages/tools/dist/replay.js +6 -108
- package/packages/tools/dist/schema-validate.js +1 -141
- package/packages/tools/dist/scope-map.js +1 -72
- package/packages/tools/dist/snippet.js +1 -80
- package/packages/tools/dist/stash.js +2 -60
- package/packages/tools/dist/stratum-card.js +5 -238
- package/packages/tools/dist/symbol.js +3 -87
- package/packages/tools/dist/test-run.js +2 -55
- package/packages/tools/dist/text-utils.js +2 -31
- package/packages/tools/dist/time-utils.js +1 -135
- package/packages/tools/dist/trace.js +2 -114
- package/packages/tools/dist/truncation.js +10 -41
- package/packages/tools/dist/watch.js +1 -61
- package/packages/tools/dist/web-fetch.js +9 -244
- package/packages/tools/dist/web-search.js +1 -46
- package/packages/tools/dist/workset.js +2 -77
- package/packages/tui/dist/App.js +260 -52468
- package/packages/tui/dist/index.js +286 -54551
- package/packages/tui/dist/panels/CuratedPanel.js +211 -34291
- package/packages/tui/dist/panels/LogPanel.js +259 -51703
- package/packages/tui/dist/panels/SearchPanel.js +212 -34824
- package/packages/tui/dist/panels/StatusPanel.js +211 -34304
|
@@ -1,90 +1 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { TreeSitterRuntime } from "./treesitter-chunker.js";
|
|
3
|
-
const FUNCTION_NODE_TYPES = /* @__PURE__ */ new Set([
|
|
4
|
-
// TS/JS
|
|
5
|
-
"function_declaration",
|
|
6
|
-
"method_definition",
|
|
7
|
-
"arrow_function",
|
|
8
|
-
// Python
|
|
9
|
-
"function_definition",
|
|
10
|
-
// Go
|
|
11
|
-
"function_declaration",
|
|
12
|
-
"method_declaration",
|
|
13
|
-
// Rust
|
|
14
|
-
"function_item",
|
|
15
|
-
// Java
|
|
16
|
-
"method_declaration",
|
|
17
|
-
"constructor_declaration"
|
|
18
|
-
]);
|
|
19
|
-
const CALL_NODE_TYPES = /* @__PURE__ */ new Set([
|
|
20
|
-
"call_expression",
|
|
21
|
-
// TS/JS/Go/Rust
|
|
22
|
-
"new_expression",
|
|
23
|
-
// TS/JS (new Foo())
|
|
24
|
-
"call"
|
|
25
|
-
// Python
|
|
26
|
-
]);
|
|
27
|
-
function extractCallEdges(content, filePath) {
|
|
28
|
-
const runtime = TreeSitterRuntime.get();
|
|
29
|
-
if (!runtime) return null;
|
|
30
|
-
const ext = extname(filePath).toLowerCase();
|
|
31
|
-
if (!runtime.hasLanguage(ext)) return null;
|
|
32
|
-
const tree = runtime.parse(content, ext);
|
|
33
|
-
if (!tree) return null;
|
|
34
|
-
const edges = [];
|
|
35
|
-
const rootNode = tree.rootNode;
|
|
36
|
-
walkNode(rootNode, filePath, "<module>", edges);
|
|
37
|
-
return edges;
|
|
38
|
-
}
|
|
39
|
-
function walkNode(node, filePath, currentScope, edges) {
|
|
40
|
-
if (!node) return;
|
|
41
|
-
let scope = currentScope;
|
|
42
|
-
if (FUNCTION_NODE_TYPES.has(node.type)) {
|
|
43
|
-
scope = extractFunctionName(node) ?? currentScope;
|
|
44
|
-
}
|
|
45
|
-
if (CALL_NODE_TYPES.has(node.type)) {
|
|
46
|
-
const callee = extractCalleeName(node);
|
|
47
|
-
if (callee) {
|
|
48
|
-
edges.push({
|
|
49
|
-
callerFile: filePath,
|
|
50
|
-
callerName: scope,
|
|
51
|
-
calleeName: callee,
|
|
52
|
-
line: (node.startPosition?.row ?? 0) + 1
|
|
53
|
-
});
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
for (let i = 0; i < (node.childCount ?? 0); i++) {
|
|
57
|
-
const child = node.child(i);
|
|
58
|
-
if (child) walkNode(child, filePath, scope, edges);
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
function extractFunctionName(node) {
|
|
62
|
-
for (let i = 0; i < (node.childCount ?? 0); i++) {
|
|
63
|
-
const child = node.child(i);
|
|
64
|
-
if (!child) continue;
|
|
65
|
-
if (child.type === "identifier" || child.type === "property_identifier" || child.type === "name") {
|
|
66
|
-
return child.text ?? null;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
return null;
|
|
70
|
-
}
|
|
71
|
-
function extractCalleeName(node) {
|
|
72
|
-
const fn = node.childForFieldName?.("function") ?? node.child(0);
|
|
73
|
-
if (!fn) return null;
|
|
74
|
-
if (fn.type === "identifier" || fn.type === "name") {
|
|
75
|
-
return fn.text ?? null;
|
|
76
|
-
}
|
|
77
|
-
if (fn.type === "member_expression" || fn.type === "attribute") {
|
|
78
|
-
const property = fn.childForFieldName?.("property") ?? fn.childForFieldName?.("attribute");
|
|
79
|
-
return property?.text ?? null;
|
|
80
|
-
}
|
|
81
|
-
if (node.type === "new_expression") {
|
|
82
|
-
const ctor = node.child(1);
|
|
83
|
-
return ctor?.text ?? null;
|
|
84
|
-
}
|
|
85
|
-
return null;
|
|
86
|
-
}
|
|
87
|
-
export {
|
|
88
|
-
extractCallEdges
|
|
89
|
-
};
|
|
90
|
-
//# sourceMappingURL=call-graph-extractor.js.map
|
|
1
|
+
import{extname as a}from"node:path";import{TreeSitterRuntime as u}from"./treesitter-chunker.js";const s=new Set(["function_declaration","method_definition","arrow_function","function_definition","function_declaration","method_declaration","function_item","method_declaration","constructor_declaration"]),f=new Set(["call_expression","new_expression","call"]);function g(t,e){const n=u.get();if(!n)return null;const l=a(e).toLowerCase();if(!n.hasLanguage(l))return null;const i=n.parse(t,l);if(!i)return null;const r=[],o=i.rootNode;return c(o,e,"<module>",r),r}function c(t,e,n,l){if(!t)return;let i=n;if(s.has(t.type)&&(i=p(t)??n),f.has(t.type)){const r=d(t);r&&l.push({callerFile:e,callerName:i,calleeName:r,line:(t.startPosition?.row??0)+1})}for(let r=0;r<(t.childCount??0);r++){const o=t.child(r);o&&c(o,e,i,l)}}function p(t){for(let e=0;e<(t.childCount??0);e++){const n=t.child(e);if(n&&(n.type==="identifier"||n.type==="property_identifier"||n.type==="name"))return n.text??null}return null}function d(t){const e=t.childForFieldName?.("function")??t.child(0);return e?e.type==="identifier"||e.type==="name"?e.text??null:e.type==="member_expression"||e.type==="attribute"?(e.childForFieldName?.("property")??e.childForFieldName?.("attribute"))?.text??null:t.type==="new_expression"?t.child(1)?.text??null:null:null}export{g as extractCallEdges};
|
|
@@ -1,36 +1 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { GenericChunker } from "./generic-chunker.js";
|
|
3
|
-
import { MarkdownChunker } from "./markdown-chunker.js";
|
|
4
|
-
import { TreeSitterChunker, TreeSitterRuntime } from "./treesitter-chunker.js";
|
|
5
|
-
function createChunker(fileExtension) {
|
|
6
|
-
const ext = fileExtension.toLowerCase();
|
|
7
|
-
switch (ext) {
|
|
8
|
-
case ".md":
|
|
9
|
-
case ".mdx":
|
|
10
|
-
return new MarkdownChunker();
|
|
11
|
-
case ".ts":
|
|
12
|
-
case ".tsx":
|
|
13
|
-
case ".mts":
|
|
14
|
-
case ".cts":
|
|
15
|
-
case ".js":
|
|
16
|
-
case ".jsx":
|
|
17
|
-
case ".mjs":
|
|
18
|
-
case ".cjs":
|
|
19
|
-
case ".py":
|
|
20
|
-
case ".go":
|
|
21
|
-
case ".rs":
|
|
22
|
-
case ".java": {
|
|
23
|
-
const runtime = TreeSitterRuntime.get();
|
|
24
|
-
if (runtime?.hasLanguage(ext)) {
|
|
25
|
-
return new TreeSitterChunker(runtime);
|
|
26
|
-
}
|
|
27
|
-
return new CodeChunker();
|
|
28
|
-
}
|
|
29
|
-
default:
|
|
30
|
-
return new GenericChunker();
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
export {
|
|
34
|
-
createChunker
|
|
35
|
-
};
|
|
36
|
-
//# sourceMappingURL=chunker-factory.js.map
|
|
1
|
+
import{CodeChunker as s}from"./code-chunker.js";import{GenericChunker as n}from"./generic-chunker.js";import{MarkdownChunker as a}from"./markdown-chunker.js";import{TreeSitterChunker as c,TreeSitterRuntime as o}from"./treesitter-chunker.js";function f(t){const e=t.toLowerCase();switch(e){case".md":case".mdx":return new a;case".ts":case".tsx":case".mts":case".cts":case".js":case".jsx":case".mjs":case".cjs":case".py":case".go":case".rs":case".java":{const r=o.get();return r?.hasLanguage(e)?new c(r):new s}default:return new n}}export{f as createChunker};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
//# sourceMappingURL=chunker.interface.js.map
|
|
@@ -1,134 +1,14 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
const end = i + 1 < boundaries.length ? boundaries[i + 1].offset : content.length;
|
|
16
|
-
let text = content.slice(start, end).trim();
|
|
17
|
-
const header = `// File: ${metadata.sourcePath}
|
|
18
|
-
`;
|
|
19
|
-
text = header + text;
|
|
20
|
-
if (text.length > this.maxChunkSize) {
|
|
21
|
-
const subChunks = this.splitByLines(text, this.maxChunkSize);
|
|
22
|
-
let currentLine = this.getLineNumber(content, start);
|
|
23
|
-
for (const sub of subChunks) {
|
|
24
|
-
const subLines = sub.split("\n").length;
|
|
25
|
-
chunks.push({
|
|
26
|
-
text: sub,
|
|
27
|
-
sourcePath: metadata.sourcePath,
|
|
28
|
-
contentType: metadata.contentType,
|
|
29
|
-
chunkIndex: chunks.length,
|
|
30
|
-
totalChunks: 0,
|
|
31
|
-
startLine: currentLine,
|
|
32
|
-
endLine: currentLine + subLines - 1
|
|
33
|
-
});
|
|
34
|
-
currentLine += subLines;
|
|
35
|
-
}
|
|
36
|
-
} else {
|
|
37
|
-
const startLine = this.getLineNumber(content, start);
|
|
38
|
-
chunks.push({
|
|
39
|
-
text,
|
|
40
|
-
sourcePath: metadata.sourcePath,
|
|
41
|
-
contentType: metadata.contentType,
|
|
42
|
-
chunkIndex: chunks.length,
|
|
43
|
-
totalChunks: 0,
|
|
44
|
-
startLine,
|
|
45
|
-
endLine: startLine + text.split("\n").length - 1
|
|
46
|
-
});
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
if (boundaries[0].offset > 0) {
|
|
50
|
-
const preamble = content.slice(0, boundaries[0].offset).trim();
|
|
51
|
-
if (preamble.length > 0) {
|
|
52
|
-
chunks.unshift({
|
|
53
|
-
text: `// File: ${metadata.sourcePath}
|
|
54
|
-
${preamble}`,
|
|
55
|
-
sourcePath: metadata.sourcePath,
|
|
56
|
-
contentType: metadata.contentType,
|
|
57
|
-
chunkIndex: 0,
|
|
58
|
-
totalChunks: 0,
|
|
59
|
-
startLine: 1,
|
|
60
|
-
endLine: this.getLineNumber(content, boundaries[0].offset) - 1
|
|
61
|
-
});
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
|
|
65
|
-
}
|
|
66
|
-
findDeclarationBoundaries(content) {
|
|
67
|
-
const pattern = /^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|enum|abstract\s+class)\s+(\w+)/gm;
|
|
68
|
-
const boundaries = [];
|
|
69
|
-
let match;
|
|
70
|
-
while ((match = pattern.exec(content)) !== null) {
|
|
71
|
-
const lineStart = content.lastIndexOf("\n", match.index - 1) + 1;
|
|
72
|
-
let actualStart = lineStart;
|
|
73
|
-
const beforeContent = content.slice(0, lineStart);
|
|
74
|
-
const beforeLines = beforeContent.split("\n");
|
|
75
|
-
let j = beforeLines.length - 1;
|
|
76
|
-
while (j >= 0) {
|
|
77
|
-
const line = beforeLines[j].trim();
|
|
78
|
-
if (line === "" || line.startsWith("//") || line.startsWith("*") || line.startsWith("/*") || line.startsWith("*/") || line.startsWith("@")) {
|
|
79
|
-
j--;
|
|
80
|
-
} else {
|
|
81
|
-
break;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
if (j < beforeLines.length - 1) {
|
|
85
|
-
actualStart = beforeLines.slice(0, j + 1).join("\n").length + 1;
|
|
86
|
-
}
|
|
87
|
-
boundaries.push({
|
|
88
|
-
offset: actualStart,
|
|
89
|
-
name: match[1]
|
|
90
|
-
});
|
|
91
|
-
}
|
|
92
|
-
return boundaries;
|
|
93
|
-
}
|
|
94
|
-
fallbackChunk(content, metadata) {
|
|
95
|
-
const header = `// File: ${metadata.sourcePath}
|
|
96
|
-
`;
|
|
97
|
-
return [
|
|
98
|
-
{
|
|
99
|
-
text: header + content,
|
|
100
|
-
sourcePath: metadata.sourcePath,
|
|
101
|
-
contentType: metadata.contentType,
|
|
102
|
-
chunkIndex: 0,
|
|
103
|
-
totalChunks: 1,
|
|
104
|
-
startLine: 1,
|
|
105
|
-
endLine: content.split("\n").length
|
|
106
|
-
}
|
|
107
|
-
];
|
|
108
|
-
}
|
|
109
|
-
splitByLines(text, maxSize) {
|
|
110
|
-
const lines = text.split("\n");
|
|
111
|
-
const result = [];
|
|
112
|
-
let current = [];
|
|
113
|
-
let currentSize = 0;
|
|
114
|
-
for (const line of lines) {
|
|
115
|
-
if (currentSize + line.length + 1 > maxSize && current.length > 0) {
|
|
116
|
-
result.push(current.join("\n"));
|
|
117
|
-
current = [line];
|
|
118
|
-
currentSize = line.length;
|
|
119
|
-
} else {
|
|
120
|
-
current.push(line);
|
|
121
|
-
currentSize += line.length + 1;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
if (current.length > 0) result.push(current.join("\n"));
|
|
125
|
-
return result;
|
|
126
|
-
}
|
|
127
|
-
getLineNumber(content, offset) {
|
|
128
|
-
return content.slice(0, offset).split("\n").length;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
export {
|
|
132
|
-
CodeChunker
|
|
133
|
-
};
|
|
134
|
-
//# sourceMappingURL=code-chunker.js.map
|
|
1
|
+
import{CHUNK_SIZES as p}from"@kb/core";class C{maxChunkSize;constructor(e){this.maxChunkSize=e?.maxChunkSize??p.code.max}chunk(e,n){const r=this.findDeclarationBoundaries(e);if(r.length===0)return this.fallbackChunk(e,n);const s=[];for(let t=0;t<r.length;t++){const i=r[t].offset,l=t+1<r.length?r[t+1].offset:e.length;let h=e.slice(i,l).trim();if(h=`// File: ${n.sourcePath}
|
|
2
|
+
`+h,h.length>this.maxChunkSize){const u=this.splitByLines(h,this.maxChunkSize);let c=this.getLineNumber(e,i);for(const a of u){const f=a.split(`
|
|
3
|
+
`).length;s.push({text:a,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:s.length,totalChunks:0,startLine:c,endLine:c+f-1}),c+=f}}else{const u=this.getLineNumber(e,i);s.push({text:h,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:s.length,totalChunks:0,startLine:u,endLine:u+h.split(`
|
|
4
|
+
`).length-1})}}if(r[0].offset>0){const t=e.slice(0,r[0].offset).trim();t.length>0&&s.unshift({text:`// File: ${n.sourcePath}
|
|
5
|
+
${t}`,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:0,totalChunks:0,startLine:1,endLine:this.getLineNumber(e,r[0].offset)-1})}return s.map((t,i)=>({...t,chunkIndex:i,totalChunks:s.length}))}findDeclarationBoundaries(e){const n=/^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|enum|abstract\s+class)\s+(\w+)/gm,r=[];let s;for(;(s=n.exec(e))!==null;){const t=e.lastIndexOf(`
|
|
6
|
+
`,s.index-1)+1;let i=t;const h=e.slice(0,t).split(`
|
|
7
|
+
`);let o=h.length-1;for(;o>=0;){const u=h[o].trim();if(u===""||u.startsWith("//")||u.startsWith("*")||u.startsWith("/*")||u.startsWith("*/")||u.startsWith("@"))o--;else break}o<h.length-1&&(i=h.slice(0,o+1).join(`
|
|
8
|
+
`).length+1),r.push({offset:i,name:s[1]})}return r}fallbackChunk(e,n){return[{text:`// File: ${n.sourcePath}
|
|
9
|
+
`+e,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:0,totalChunks:1,startLine:1,endLine:e.split(`
|
|
10
|
+
`).length}]}splitByLines(e,n){const r=e.split(`
|
|
11
|
+
`),s=[];let t=[],i=0;for(const l of r)i+l.length+1>n&&t.length>0?(s.push(t.join(`
|
|
12
|
+
`)),t=[l],i=l.length):(t.push(l),i+=l.length+1);return t.length>0&&s.push(t.join(`
|
|
13
|
+
`)),s}getLineNumber(e,n){return e.slice(0,n).split(`
|
|
14
|
+
`).length}}export{C as CodeChunker};
|
|
@@ -1,72 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.default.max;
|
|
7
|
-
this.overlap = options?.overlap ?? CHUNK_SIZES.default.overlap;
|
|
8
|
-
}
|
|
9
|
-
chunk(content, metadata) {
|
|
10
|
-
if (content.length <= this.maxChunkSize) {
|
|
11
|
-
return [
|
|
12
|
-
{
|
|
13
|
-
text: content,
|
|
14
|
-
sourcePath: metadata.sourcePath,
|
|
15
|
-
contentType: metadata.contentType,
|
|
16
|
-
chunkIndex: 0,
|
|
17
|
-
totalChunks: 1,
|
|
18
|
-
startLine: 1,
|
|
19
|
-
endLine: content.split("\n").length
|
|
20
|
-
}
|
|
21
|
-
];
|
|
22
|
-
}
|
|
23
|
-
const lines = content.split("\n");
|
|
24
|
-
const chunks = [];
|
|
25
|
-
let currentLines = [];
|
|
26
|
-
let currentSize = 0;
|
|
27
|
-
let startLine = 1;
|
|
28
|
-
for (let i = 0; i < lines.length; i++) {
|
|
29
|
-
const line = lines[i];
|
|
30
|
-
if (currentSize + line.length + 1 > this.maxChunkSize && currentLines.length > 0) {
|
|
31
|
-
chunks.push({
|
|
32
|
-
text: currentLines.join("\n"),
|
|
33
|
-
sourcePath: metadata.sourcePath,
|
|
34
|
-
contentType: metadata.contentType,
|
|
35
|
-
chunkIndex: chunks.length,
|
|
36
|
-
totalChunks: 0,
|
|
37
|
-
startLine,
|
|
38
|
-
endLine: startLine + currentLines.length - 1
|
|
39
|
-
});
|
|
40
|
-
const overlapLines = [];
|
|
41
|
-
let overlapSize = 0;
|
|
42
|
-
for (let j = currentLines.length - 1; j >= 0; j--) {
|
|
43
|
-
if (overlapSize + currentLines[j].length + 1 > this.overlap) break;
|
|
44
|
-
overlapLines.unshift(currentLines[j]);
|
|
45
|
-
overlapSize += currentLines[j].length + 1;
|
|
46
|
-
}
|
|
47
|
-
startLine = startLine + currentLines.length - overlapLines.length;
|
|
48
|
-
currentLines = [...overlapLines, line];
|
|
49
|
-
currentSize = overlapSize + line.length + 1;
|
|
50
|
-
} else {
|
|
51
|
-
currentLines.push(line);
|
|
52
|
-
currentSize += line.length + 1;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
if (currentLines.length > 0) {
|
|
56
|
-
chunks.push({
|
|
57
|
-
text: currentLines.join("\n"),
|
|
58
|
-
sourcePath: metadata.sourcePath,
|
|
59
|
-
contentType: metadata.contentType,
|
|
60
|
-
chunkIndex: chunks.length,
|
|
61
|
-
totalChunks: 0,
|
|
62
|
-
startLine,
|
|
63
|
-
endLine: startLine + currentLines.length - 1
|
|
64
|
-
});
|
|
65
|
-
}
|
|
66
|
-
return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
export {
|
|
70
|
-
GenericChunker
|
|
71
|
-
};
|
|
72
|
-
//# sourceMappingURL=generic-chunker.js.map
|
|
1
|
+
import{CHUNK_SIZES as p}from"@kb/core";class C{maxChunkSize;overlap;constructor(n){this.maxChunkSize=n?.maxChunkSize??p.default.max,this.overlap=n?.overlap??p.default.overlap}chunk(n,h){if(n.length<=this.maxChunkSize)return[{text:n,sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:0,totalChunks:1,startLine:1,endLine:n.split(`
|
|
2
|
+
`).length}];const c=n.split(`
|
|
3
|
+
`),r=[];let e=[],o=0,l=1;for(let u=0;u<c.length;u++){const t=c[u];if(o+t.length+1>this.maxChunkSize&&e.length>0){r.push({text:e.join(`
|
|
4
|
+
`),sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:r.length,totalChunks:0,startLine:l,endLine:l+e.length-1});const s=[];let a=0;for(let i=e.length-1;i>=0&&!(a+e[i].length+1>this.overlap);i--)s.unshift(e[i]),a+=e[i].length+1;l=l+e.length-s.length,e=[...s,t],o=a+t.length+1}else e.push(t),o+=t.length+1}return e.length>0&&r.push({text:e.join(`
|
|
5
|
+
`),sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:r.length,totalChunks:0,startLine:l,endLine:l+e.length-1}),r.map((u,t)=>({...u,chunkIndex:t,totalChunks:r.length}))}}export{C as GenericChunker};
|
|
@@ -1,21 +1 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { createChunker } from "./chunker-factory.js";
|
|
3
|
-
import { CodeChunker } from "./code-chunker.js";
|
|
4
|
-
import { GenericChunker } from "./generic-chunker.js";
|
|
5
|
-
import { MarkdownChunker } from "./markdown-chunker.js";
|
|
6
|
-
import {
|
|
7
|
-
initializeTreeSitter,
|
|
8
|
-
TreeSitterChunker,
|
|
9
|
-
TreeSitterRuntime
|
|
10
|
-
} from "./treesitter-chunker.js";
|
|
11
|
-
export {
|
|
12
|
-
CodeChunker,
|
|
13
|
-
GenericChunker,
|
|
14
|
-
MarkdownChunker,
|
|
15
|
-
TreeSitterChunker,
|
|
16
|
-
TreeSitterRuntime,
|
|
17
|
-
createChunker,
|
|
18
|
-
extractCallEdges,
|
|
19
|
-
initializeTreeSitter
|
|
20
|
-
};
|
|
21
|
-
//# sourceMappingURL=index.js.map
|
|
1
|
+
import{extractCallEdges as t}from"./call-graph-extractor.js";import{createChunker as n}from"./chunker-factory.js";import{CodeChunker as p}from"./code-chunker.js";import{GenericChunker as f}from"./generic-chunker.js";import{MarkdownChunker as m}from"./markdown-chunker.js";import{initializeTreeSitter as x,TreeSitterChunker as a,TreeSitterRuntime as h}from"./treesitter-chunker.js";export{p as CodeChunker,f as GenericChunker,m as MarkdownChunker,a as TreeSitterChunker,h as TreeSitterRuntime,n as createChunker,t as extractCallEdges,x as initializeTreeSitter};
|
|
@@ -1,122 +1,10 @@
|
|
|
1
|
-
import
|
|
2
|
-
class MarkdownChunker {
|
|
3
|
-
maxChunkSize;
|
|
4
|
-
minChunkSize;
|
|
5
|
-
constructor(options) {
|
|
6
|
-
this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.markdown.max;
|
|
7
|
-
this.minChunkSize = options?.minChunkSize ?? CHUNK_SIZES.markdown.min;
|
|
8
|
-
}
|
|
9
|
-
chunk(content, metadata) {
|
|
10
|
-
const sections = this.splitByHeadings(content);
|
|
11
|
-
const chunks = [];
|
|
12
|
-
for (const section of sections) {
|
|
13
|
-
if (section.text.trim().length < this.minChunkSize) {
|
|
14
|
-
if (chunks.length > 0) {
|
|
15
|
-
const prev = chunks[chunks.length - 1];
|
|
16
|
-
prev.text += `
|
|
1
|
+
import{CHUNK_SIZES as u}from"@kb/core";class p{maxChunkSize;minChunkSize;constructor(r){this.maxChunkSize=r?.maxChunkSize??u.markdown.max,this.minChunkSize=r?.minChunkSize??u.markdown.min}chunk(r,s){const h=this.splitByHeadings(r),t=[];for(const n of h){if(n.text.trim().length<this.minChunkSize&&t.length>0){const i=t[t.length-1];i.text+=`
|
|
17
2
|
|
|
18
|
-
${
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
23
|
-
if (section.text.length > this.maxChunkSize) {
|
|
24
|
-
const subTexts = this.splitByParagraphs(section.text, this.maxChunkSize);
|
|
25
|
-
let currentLine = section.startLine;
|
|
26
|
-
for (const sub of subTexts) {
|
|
27
|
-
const subLines = sub.split("\n").length;
|
|
28
|
-
chunks.push({
|
|
29
|
-
text: sub,
|
|
30
|
-
sourcePath: metadata.sourcePath,
|
|
31
|
-
contentType: metadata.contentType,
|
|
32
|
-
headingPath: section.headingPath,
|
|
33
|
-
chunkIndex: chunks.length,
|
|
34
|
-
totalChunks: 0,
|
|
35
|
-
// will be set below
|
|
36
|
-
startLine: currentLine,
|
|
37
|
-
endLine: currentLine + subLines - 1
|
|
38
|
-
});
|
|
39
|
-
currentLine += subLines;
|
|
40
|
-
}
|
|
41
|
-
} else {
|
|
42
|
-
chunks.push({
|
|
43
|
-
text: section.text,
|
|
44
|
-
sourcePath: metadata.sourcePath,
|
|
45
|
-
contentType: metadata.contentType,
|
|
46
|
-
headingPath: section.headingPath,
|
|
47
|
-
chunkIndex: chunks.length,
|
|
48
|
-
totalChunks: 0,
|
|
49
|
-
startLine: section.startLine,
|
|
50
|
-
endLine: section.endLine
|
|
51
|
-
});
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
|
|
55
|
-
}
|
|
56
|
-
splitByHeadings(content) {
|
|
57
|
-
const lines = content.split("\n");
|
|
58
|
-
const sections = [];
|
|
59
|
-
let currentSection = null;
|
|
60
|
-
const headingStack = [];
|
|
61
|
-
let inFencedCodeBlock = false;
|
|
62
|
-
for (let i = 0; i < lines.length; i++) {
|
|
63
|
-
if (/^```/.test(lines[i])) {
|
|
64
|
-
inFencedCodeBlock = !inFencedCodeBlock;
|
|
65
|
-
}
|
|
66
|
-
const match = !inFencedCodeBlock ? lines[i].match(/^(#{1,6})\s+(.+)/) : null;
|
|
67
|
-
if (match) {
|
|
68
|
-
if (currentSection) {
|
|
69
|
-
sections.push(currentSection);
|
|
70
|
-
}
|
|
71
|
-
const level = match[1].length;
|
|
72
|
-
const title = match[2].trim();
|
|
73
|
-
while (headingStack.length >= level) {
|
|
74
|
-
headingStack.pop();
|
|
75
|
-
}
|
|
76
|
-
headingStack.push(`${"#".repeat(level)} ${title}`);
|
|
77
|
-
currentSection = {
|
|
78
|
-
text: lines[i],
|
|
79
|
-
headingPath: headingStack.join(" > "),
|
|
80
|
-
startLine: i + 1,
|
|
81
|
-
endLine: i + 1
|
|
82
|
-
};
|
|
83
|
-
} else if (currentSection) {
|
|
84
|
-
currentSection.text += `
|
|
85
|
-
${lines[i]}`;
|
|
86
|
-
currentSection.endLine = i + 1;
|
|
87
|
-
} else {
|
|
88
|
-
currentSection = {
|
|
89
|
-
text: lines[i],
|
|
90
|
-
headingPath: "(intro)",
|
|
91
|
-
startLine: i + 1,
|
|
92
|
-
endLine: i + 1
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
if (currentSection) sections.push(currentSection);
|
|
97
|
-
return sections;
|
|
98
|
-
}
|
|
99
|
-
splitByParagraphs(text, maxSize) {
|
|
100
|
-
const paragraphs = text.split(/\n\n+/);
|
|
101
|
-
const result = [];
|
|
102
|
-
let current = "";
|
|
103
|
-
for (const para of paragraphs) {
|
|
104
|
-
if (`${current}
|
|
3
|
+
${n.text}`,i.endLine=n.endLine;continue}if(n.text.length>this.maxChunkSize){const i=this.splitByParagraphs(n.text,this.maxChunkSize);let e=n.startLine;for(const a of i){const o=a.split(`
|
|
4
|
+
`).length;t.push({text:a,sourcePath:s.sourcePath,contentType:s.contentType,headingPath:n.headingPath,chunkIndex:t.length,totalChunks:0,startLine:e,endLine:e+o-1}),e+=o}}else t.push({text:n.text,sourcePath:s.sourcePath,contentType:s.contentType,headingPath:n.headingPath,chunkIndex:t.length,totalChunks:0,startLine:n.startLine,endLine:n.endLine})}return t.map((n,i)=>({...n,chunkIndex:i,totalChunks:t.length}))}splitByHeadings(r){const s=r.split(`
|
|
5
|
+
`),h=[];let t=null;const n=[];let i=!1;for(let e=0;e<s.length;e++){/^```/.test(s[e])&&(i=!i);const a=i?null:s[e].match(/^(#{1,6})\s+(.+)/);if(a){t&&h.push(t);const o=a[1].length,c=a[2].trim();for(;n.length>=o;)n.pop();n.push(`${"#".repeat(o)} ${c}`),t={text:s[e],headingPath:n.join(" > "),startLine:e+1,endLine:e+1}}else t?(t.text+=`
|
|
6
|
+
${s[e]}`,t.endLine=e+1):t={text:s[e],headingPath:"(intro)",startLine:e+1,endLine:e+1}}return t&&h.push(t),h}splitByParagraphs(r,s){const h=r.split(/\n\n+/),t=[];let n="";for(const i of h)`${n}
|
|
105
7
|
|
|
106
|
-
${
|
|
107
|
-
result.push(current.trim());
|
|
108
|
-
current = para;
|
|
109
|
-
} else {
|
|
110
|
-
current = current ? `${current}
|
|
8
|
+
${i}`.length>s&&n.length>0?(t.push(n.trim()),n=i):n=n?`${n}
|
|
111
9
|
|
|
112
|
-
${
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
if (current.trim()) result.push(current.trim());
|
|
116
|
-
return result;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
export {
|
|
120
|
-
MarkdownChunker
|
|
121
|
-
};
|
|
122
|
-
//# sourceMappingURL=markdown-chunker.js.map
|
|
10
|
+
${i}`:i;return n.trim()&&t.push(n.trim()),t}}export{p as MarkdownChunker};
|