@vpxa/kb 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/package.json +1 -1
  2. package/packages/analyzers/dist/blast-radius-analyzer.js +13 -114
  3. package/packages/analyzers/dist/dependency-analyzer.js +11 -425
  4. package/packages/analyzers/dist/diagram-generator.js +4 -86
  5. package/packages/analyzers/dist/entry-point-analyzer.js +5 -239
  6. package/packages/analyzers/dist/index.js +1 -23
  7. package/packages/analyzers/dist/knowledge-producer.js +24 -113
  8. package/packages/analyzers/dist/pattern-analyzer.js +5 -359
  9. package/packages/analyzers/dist/regex-call-graph.js +1 -428
  10. package/packages/analyzers/dist/structure-analyzer.js +4 -258
  11. package/packages/analyzers/dist/symbol-analyzer.js +13 -442
  12. package/packages/analyzers/dist/ts-call-graph.js +1 -160
  13. package/packages/analyzers/dist/types.js +0 -1
  14. package/packages/chunker/dist/call-graph-extractor.js +1 -90
  15. package/packages/chunker/dist/chunker-factory.js +1 -36
  16. package/packages/chunker/dist/chunker.interface.js +0 -1
  17. package/packages/chunker/dist/code-chunker.js +14 -134
  18. package/packages/chunker/dist/generic-chunker.js +5 -72
  19. package/packages/chunker/dist/index.js +1 -21
  20. package/packages/chunker/dist/markdown-chunker.js +7 -119
  21. package/packages/chunker/dist/treesitter-chunker.js +8 -234
  22. package/packages/cli/dist/commands/analyze.js +3 -112
  23. package/packages/cli/dist/commands/context-cmds.js +1 -155
  24. package/packages/cli/dist/commands/environment.js +2 -204
  25. package/packages/cli/dist/commands/execution.js +1 -137
  26. package/packages/cli/dist/commands/graph.js +7 -81
  27. package/packages/cli/dist/commands/init.js +9 -87
  28. package/packages/cli/dist/commands/knowledge.js +1 -139
  29. package/packages/cli/dist/commands/search.js +8 -267
  30. package/packages/cli/dist/commands/system.js +4 -241
  31. package/packages/cli/dist/commands/workspace.js +2 -388
  32. package/packages/cli/dist/context.js +1 -14
  33. package/packages/cli/dist/helpers.js +3 -458
  34. package/packages/cli/dist/index.js +3 -69
  35. package/packages/cli/dist/kb-init.js +1 -82
  36. package/packages/cli/dist/types.js +0 -1
  37. package/packages/core/dist/constants.js +1 -43
  38. package/packages/core/dist/content-detector.js +1 -79
  39. package/packages/core/dist/errors.js +1 -40
  40. package/packages/core/dist/index.js +1 -9
  41. package/packages/core/dist/logger.js +1 -34
  42. package/packages/core/dist/types.js +0 -1
  43. package/packages/embeddings/dist/embedder.interface.js +0 -1
  44. package/packages/embeddings/dist/index.js +1 -5
  45. package/packages/embeddings/dist/onnx-embedder.js +1 -82
  46. package/packages/indexer/dist/file-hasher.js +1 -13
  47. package/packages/indexer/dist/filesystem-crawler.js +1 -125
  48. package/packages/indexer/dist/graph-extractor.js +1 -111
  49. package/packages/indexer/dist/incremental-indexer.js +1 -278
  50. package/packages/indexer/dist/index.js +1 -14
  51. package/packages/server/dist/api.js +1 -9
  52. package/packages/server/dist/config.js +1 -75
  53. package/packages/server/dist/curated-manager.js +9 -356
  54. package/packages/server/dist/index.js +1 -134
  55. package/packages/server/dist/replay-interceptor.js +1 -38
  56. package/packages/server/dist/resources/resources.js +2 -40
  57. package/packages/server/dist/server.js +1 -247
  58. package/packages/server/dist/tools/analyze.tools.js +1 -288
  59. package/packages/server/dist/tools/forge.tools.js +11 -499
  60. package/packages/server/dist/tools/forget.tool.js +3 -39
  61. package/packages/server/dist/tools/graph.tool.js +5 -110
  62. package/packages/server/dist/tools/list.tool.js +5 -53
  63. package/packages/server/dist/tools/lookup.tool.js +8 -51
  64. package/packages/server/dist/tools/onboard.tool.js +2 -112
  65. package/packages/server/dist/tools/produce.tool.js +4 -74
  66. package/packages/server/dist/tools/read.tool.js +4 -47
  67. package/packages/server/dist/tools/reindex.tool.js +2 -70
  68. package/packages/server/dist/tools/remember.tool.js +3 -42
  69. package/packages/server/dist/tools/replay.tool.js +6 -88
  70. package/packages/server/dist/tools/search.tool.js +17 -327
  71. package/packages/server/dist/tools/status.tool.js +3 -68
  72. package/packages/server/dist/tools/toolkit.tools.js +20 -1673
  73. package/packages/server/dist/tools/update.tool.js +3 -39
  74. package/packages/server/dist/tools/utility.tools.js +19 -456
  75. package/packages/store/dist/graph-store.interface.js +0 -1
  76. package/packages/store/dist/index.js +1 -9
  77. package/packages/store/dist/lance-store.js +1 -258
  78. package/packages/store/dist/sqlite-graph-store.js +8 -309
  79. package/packages/store/dist/store-factory.js +1 -14
  80. package/packages/store/dist/store.interface.js +0 -1
  81. package/packages/tools/dist/batch.js +1 -45
  82. package/packages/tools/dist/changelog.js +2 -112
  83. package/packages/tools/dist/check.js +2 -59
  84. package/packages/tools/dist/checkpoint.js +2 -43
  85. package/packages/tools/dist/codemod.js +2 -69
  86. package/packages/tools/dist/compact.js +3 -60
  87. package/packages/tools/dist/data-transform.js +1 -124
  88. package/packages/tools/dist/dead-symbols.js +2 -71
  89. package/packages/tools/dist/delegate.js +3 -128
  90. package/packages/tools/dist/diff-parse.js +3 -153
  91. package/packages/tools/dist/digest.js +7 -242
  92. package/packages/tools/dist/encode.js +1 -46
  93. package/packages/tools/dist/env-info.js +1 -58
  94. package/packages/tools/dist/eval.js +3 -79
  95. package/packages/tools/dist/evidence-map.js +3 -203
  96. package/packages/tools/dist/file-summary.js +2 -106
  97. package/packages/tools/dist/file-walk.js +1 -75
  98. package/packages/tools/dist/find-examples.js +3 -48
  99. package/packages/tools/dist/find.js +1 -120
  100. package/packages/tools/dist/forge-classify.js +2 -319
  101. package/packages/tools/dist/forge-ground.js +1 -184
  102. package/packages/tools/dist/git-context.js +3 -46
  103. package/packages/tools/dist/graph-query.js +1 -194
  104. package/packages/tools/dist/health.js +1 -118
  105. package/packages/tools/dist/http-request.js +1 -58
  106. package/packages/tools/dist/index.js +1 -273
  107. package/packages/tools/dist/lane.js +7 -227
  108. package/packages/tools/dist/measure.js +2 -119
  109. package/packages/tools/dist/onboard.js +42 -1136
  110. package/packages/tools/dist/parse-output.js +2 -158
  111. package/packages/tools/dist/process-manager.js +1 -69
  112. package/packages/tools/dist/queue.js +2 -126
  113. package/packages/tools/dist/regex-test.js +1 -39
  114. package/packages/tools/dist/rename.js +2 -70
  115. package/packages/tools/dist/replay.js +6 -108
  116. package/packages/tools/dist/schema-validate.js +1 -141
  117. package/packages/tools/dist/scope-map.js +1 -72
  118. package/packages/tools/dist/snippet.js +1 -80
  119. package/packages/tools/dist/stash.js +2 -60
  120. package/packages/tools/dist/stratum-card.js +5 -238
  121. package/packages/tools/dist/symbol.js +3 -87
  122. package/packages/tools/dist/test-run.js +2 -55
  123. package/packages/tools/dist/text-utils.js +2 -31
  124. package/packages/tools/dist/time-utils.js +1 -135
  125. package/packages/tools/dist/trace.js +2 -114
  126. package/packages/tools/dist/truncation.js +10 -41
  127. package/packages/tools/dist/watch.js +1 -61
  128. package/packages/tools/dist/web-fetch.js +9 -244
  129. package/packages/tools/dist/web-search.js +1 -46
  130. package/packages/tools/dist/workset.js +2 -77
  131. package/packages/tui/dist/App.js +260 -52468
  132. package/packages/tui/dist/index.js +286 -54551
  133. package/packages/tui/dist/panels/CuratedPanel.js +211 -34291
  134. package/packages/tui/dist/panels/LogPanel.js +259 -51703
  135. package/packages/tui/dist/panels/SearchPanel.js +212 -34824
  136. package/packages/tui/dist/panels/StatusPanel.js +211 -34304
@@ -1,90 +1 @@
1
- import { extname } from "node:path";
2
- import { TreeSitterRuntime } from "./treesitter-chunker.js";
3
- const FUNCTION_NODE_TYPES = /* @__PURE__ */ new Set([
4
- // TS/JS
5
- "function_declaration",
6
- "method_definition",
7
- "arrow_function",
8
- // Python
9
- "function_definition",
10
- // Go
11
- "function_declaration",
12
- "method_declaration",
13
- // Rust
14
- "function_item",
15
- // Java
16
- "method_declaration",
17
- "constructor_declaration"
18
- ]);
19
- const CALL_NODE_TYPES = /* @__PURE__ */ new Set([
20
- "call_expression",
21
- // TS/JS/Go/Rust
22
- "new_expression",
23
- // TS/JS (new Foo())
24
- "call"
25
- // Python
26
- ]);
27
- function extractCallEdges(content, filePath) {
28
- const runtime = TreeSitterRuntime.get();
29
- if (!runtime) return null;
30
- const ext = extname(filePath).toLowerCase();
31
- if (!runtime.hasLanguage(ext)) return null;
32
- const tree = runtime.parse(content, ext);
33
- if (!tree) return null;
34
- const edges = [];
35
- const rootNode = tree.rootNode;
36
- walkNode(rootNode, filePath, "<module>", edges);
37
- return edges;
38
- }
39
- function walkNode(node, filePath, currentScope, edges) {
40
- if (!node) return;
41
- let scope = currentScope;
42
- if (FUNCTION_NODE_TYPES.has(node.type)) {
43
- scope = extractFunctionName(node) ?? currentScope;
44
- }
45
- if (CALL_NODE_TYPES.has(node.type)) {
46
- const callee = extractCalleeName(node);
47
- if (callee) {
48
- edges.push({
49
- callerFile: filePath,
50
- callerName: scope,
51
- calleeName: callee,
52
- line: (node.startPosition?.row ?? 0) + 1
53
- });
54
- }
55
- }
56
- for (let i = 0; i < (node.childCount ?? 0); i++) {
57
- const child = node.child(i);
58
- if (child) walkNode(child, filePath, scope, edges);
59
- }
60
- }
61
- function extractFunctionName(node) {
62
- for (let i = 0; i < (node.childCount ?? 0); i++) {
63
- const child = node.child(i);
64
- if (!child) continue;
65
- if (child.type === "identifier" || child.type === "property_identifier" || child.type === "name") {
66
- return child.text ?? null;
67
- }
68
- }
69
- return null;
70
- }
71
- function extractCalleeName(node) {
72
- const fn = node.childForFieldName?.("function") ?? node.child(0);
73
- if (!fn) return null;
74
- if (fn.type === "identifier" || fn.type === "name") {
75
- return fn.text ?? null;
76
- }
77
- if (fn.type === "member_expression" || fn.type === "attribute") {
78
- const property = fn.childForFieldName?.("property") ?? fn.childForFieldName?.("attribute");
79
- return property?.text ?? null;
80
- }
81
- if (node.type === "new_expression") {
82
- const ctor = node.child(1);
83
- return ctor?.text ?? null;
84
- }
85
- return null;
86
- }
87
- export {
88
- extractCallEdges
89
- };
90
- //# sourceMappingURL=call-graph-extractor.js.map
1
+ import{extname as a}from"node:path";import{TreeSitterRuntime as u}from"./treesitter-chunker.js";const s=new Set(["function_declaration","method_definition","arrow_function","function_definition","function_declaration","method_declaration","function_item","method_declaration","constructor_declaration"]),f=new Set(["call_expression","new_expression","call"]);function g(t,e){const n=u.get();if(!n)return null;const l=a(e).toLowerCase();if(!n.hasLanguage(l))return null;const i=n.parse(t,l);if(!i)return null;const r=[],o=i.rootNode;return c(o,e,"<module>",r),r}function c(t,e,n,l){if(!t)return;let i=n;if(s.has(t.type)&&(i=p(t)??n),f.has(t.type)){const r=d(t);r&&l.push({callerFile:e,callerName:i,calleeName:r,line:(t.startPosition?.row??0)+1})}for(let r=0;r<(t.childCount??0);r++){const o=t.child(r);o&&c(o,e,i,l)}}function p(t){for(let e=0;e<(t.childCount??0);e++){const n=t.child(e);if(n&&(n.type==="identifier"||n.type==="property_identifier"||n.type==="name"))return n.text??null}return null}function d(t){const e=t.childForFieldName?.("function")??t.child(0);return e?e.type==="identifier"||e.type==="name"?e.text??null:e.type==="member_expression"||e.type==="attribute"?(e.childForFieldName?.("property")??e.childForFieldName?.("attribute"))?.text??null:t.type==="new_expression"?t.child(1)?.text??null:null:null}export{g as extractCallEdges};
@@ -1,36 +1 @@
1
- import { CodeChunker } from "./code-chunker.js";
2
- import { GenericChunker } from "./generic-chunker.js";
3
- import { MarkdownChunker } from "./markdown-chunker.js";
4
- import { TreeSitterChunker, TreeSitterRuntime } from "./treesitter-chunker.js";
5
- function createChunker(fileExtension) {
6
- const ext = fileExtension.toLowerCase();
7
- switch (ext) {
8
- case ".md":
9
- case ".mdx":
10
- return new MarkdownChunker();
11
- case ".ts":
12
- case ".tsx":
13
- case ".mts":
14
- case ".cts":
15
- case ".js":
16
- case ".jsx":
17
- case ".mjs":
18
- case ".cjs":
19
- case ".py":
20
- case ".go":
21
- case ".rs":
22
- case ".java": {
23
- const runtime = TreeSitterRuntime.get();
24
- if (runtime?.hasLanguage(ext)) {
25
- return new TreeSitterChunker(runtime);
26
- }
27
- return new CodeChunker();
28
- }
29
- default:
30
- return new GenericChunker();
31
- }
32
- }
33
- export {
34
- createChunker
35
- };
36
- //# sourceMappingURL=chunker-factory.js.map
1
+ import{CodeChunker as s}from"./code-chunker.js";import{GenericChunker as n}from"./generic-chunker.js";import{MarkdownChunker as a}from"./markdown-chunker.js";import{TreeSitterChunker as c,TreeSitterRuntime as o}from"./treesitter-chunker.js";function f(t){const e=t.toLowerCase();switch(e){case".md":case".mdx":return new a;case".ts":case".tsx":case".mts":case".cts":case".js":case".jsx":case".mjs":case".cjs":case".py":case".go":case".rs":case".java":{const r=o.get();return r?.hasLanguage(e)?new c(r):new s}default:return new n}}export{f as createChunker};
@@ -1 +0,0 @@
1
- //# sourceMappingURL=chunker.interface.js.map
@@ -1,134 +1,14 @@
1
- import { CHUNK_SIZES } from "@kb/core";
2
- class CodeChunker {
3
- maxChunkSize;
4
- constructor(options) {
5
- this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.code.max;
6
- }
7
- chunk(content, metadata) {
8
- const boundaries = this.findDeclarationBoundaries(content);
9
- if (boundaries.length === 0) {
10
- return this.fallbackChunk(content, metadata);
11
- }
12
- const chunks = [];
13
- for (let i = 0; i < boundaries.length; i++) {
14
- const start = boundaries[i].offset;
15
- const end = i + 1 < boundaries.length ? boundaries[i + 1].offset : content.length;
16
- let text = content.slice(start, end).trim();
17
- const header = `// File: ${metadata.sourcePath}
18
- `;
19
- text = header + text;
20
- if (text.length > this.maxChunkSize) {
21
- const subChunks = this.splitByLines(text, this.maxChunkSize);
22
- let currentLine = this.getLineNumber(content, start);
23
- for (const sub of subChunks) {
24
- const subLines = sub.split("\n").length;
25
- chunks.push({
26
- text: sub,
27
- sourcePath: metadata.sourcePath,
28
- contentType: metadata.contentType,
29
- chunkIndex: chunks.length,
30
- totalChunks: 0,
31
- startLine: currentLine,
32
- endLine: currentLine + subLines - 1
33
- });
34
- currentLine += subLines;
35
- }
36
- } else {
37
- const startLine = this.getLineNumber(content, start);
38
- chunks.push({
39
- text,
40
- sourcePath: metadata.sourcePath,
41
- contentType: metadata.contentType,
42
- chunkIndex: chunks.length,
43
- totalChunks: 0,
44
- startLine,
45
- endLine: startLine + text.split("\n").length - 1
46
- });
47
- }
48
- }
49
- if (boundaries[0].offset > 0) {
50
- const preamble = content.slice(0, boundaries[0].offset).trim();
51
- if (preamble.length > 0) {
52
- chunks.unshift({
53
- text: `// File: ${metadata.sourcePath}
54
- ${preamble}`,
55
- sourcePath: metadata.sourcePath,
56
- contentType: metadata.contentType,
57
- chunkIndex: 0,
58
- totalChunks: 0,
59
- startLine: 1,
60
- endLine: this.getLineNumber(content, boundaries[0].offset) - 1
61
- });
62
- }
63
- }
64
- return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
65
- }
66
- findDeclarationBoundaries(content) {
67
- const pattern = /^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|enum|abstract\s+class)\s+(\w+)/gm;
68
- const boundaries = [];
69
- let match;
70
- while ((match = pattern.exec(content)) !== null) {
71
- const lineStart = content.lastIndexOf("\n", match.index - 1) + 1;
72
- let actualStart = lineStart;
73
- const beforeContent = content.slice(0, lineStart);
74
- const beforeLines = beforeContent.split("\n");
75
- let j = beforeLines.length - 1;
76
- while (j >= 0) {
77
- const line = beforeLines[j].trim();
78
- if (line === "" || line.startsWith("//") || line.startsWith("*") || line.startsWith("/*") || line.startsWith("*/") || line.startsWith("@")) {
79
- j--;
80
- } else {
81
- break;
82
- }
83
- }
84
- if (j < beforeLines.length - 1) {
85
- actualStart = beforeLines.slice(0, j + 1).join("\n").length + 1;
86
- }
87
- boundaries.push({
88
- offset: actualStart,
89
- name: match[1]
90
- });
91
- }
92
- return boundaries;
93
- }
94
- fallbackChunk(content, metadata) {
95
- const header = `// File: ${metadata.sourcePath}
96
- `;
97
- return [
98
- {
99
- text: header + content,
100
- sourcePath: metadata.sourcePath,
101
- contentType: metadata.contentType,
102
- chunkIndex: 0,
103
- totalChunks: 1,
104
- startLine: 1,
105
- endLine: content.split("\n").length
106
- }
107
- ];
108
- }
109
- splitByLines(text, maxSize) {
110
- const lines = text.split("\n");
111
- const result = [];
112
- let current = [];
113
- let currentSize = 0;
114
- for (const line of lines) {
115
- if (currentSize + line.length + 1 > maxSize && current.length > 0) {
116
- result.push(current.join("\n"));
117
- current = [line];
118
- currentSize = line.length;
119
- } else {
120
- current.push(line);
121
- currentSize += line.length + 1;
122
- }
123
- }
124
- if (current.length > 0) result.push(current.join("\n"));
125
- return result;
126
- }
127
- getLineNumber(content, offset) {
128
- return content.slice(0, offset).split("\n").length;
129
- }
130
- }
131
- export {
132
- CodeChunker
133
- };
134
- //# sourceMappingURL=code-chunker.js.map
1
+ import{CHUNK_SIZES as p}from"@kb/core";class C{maxChunkSize;constructor(e){this.maxChunkSize=e?.maxChunkSize??p.code.max}chunk(e,n){const r=this.findDeclarationBoundaries(e);if(r.length===0)return this.fallbackChunk(e,n);const s=[];for(let t=0;t<r.length;t++){const i=r[t].offset,l=t+1<r.length?r[t+1].offset:e.length;let h=e.slice(i,l).trim();if(h=`// File: ${n.sourcePath}
2
+ `+h,h.length>this.maxChunkSize){const u=this.splitByLines(h,this.maxChunkSize);let c=this.getLineNumber(e,i);for(const a of u){const f=a.split(`
3
+ `).length;s.push({text:a,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:s.length,totalChunks:0,startLine:c,endLine:c+f-1}),c+=f}}else{const u=this.getLineNumber(e,i);s.push({text:h,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:s.length,totalChunks:0,startLine:u,endLine:u+h.split(`
4
+ `).length-1})}}if(r[0].offset>0){const t=e.slice(0,r[0].offset).trim();t.length>0&&s.unshift({text:`// File: ${n.sourcePath}
5
+ ${t}`,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:0,totalChunks:0,startLine:1,endLine:this.getLineNumber(e,r[0].offset)-1})}return s.map((t,i)=>({...t,chunkIndex:i,totalChunks:s.length}))}findDeclarationBoundaries(e){const n=/^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|enum|abstract\s+class)\s+(\w+)/gm,r=[];let s;for(;(s=n.exec(e))!==null;){const t=e.lastIndexOf(`
6
+ `,s.index-1)+1;let i=t;const h=e.slice(0,t).split(`
7
+ `);let o=h.length-1;for(;o>=0;){const u=h[o].trim();if(u===""||u.startsWith("//")||u.startsWith("*")||u.startsWith("/*")||u.startsWith("*/")||u.startsWith("@"))o--;else break}o<h.length-1&&(i=h.slice(0,o+1).join(`
8
+ `).length+1),r.push({offset:i,name:s[1]})}return r}fallbackChunk(e,n){return[{text:`// File: ${n.sourcePath}
9
+ `+e,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:0,totalChunks:1,startLine:1,endLine:e.split(`
10
+ `).length}]}splitByLines(e,n){const r=e.split(`
11
+ `),s=[];let t=[],i=0;for(const l of r)i+l.length+1>n&&t.length>0?(s.push(t.join(`
12
+ `)),t=[l],i=l.length):(t.push(l),i+=l.length+1);return t.length>0&&s.push(t.join(`
13
+ `)),s}getLineNumber(e,n){return e.slice(0,n).split(`
14
+ `).length}}export{C as CodeChunker};
@@ -1,72 +1,5 @@
1
- import { CHUNK_SIZES } from "@kb/core";
2
- class GenericChunker {
3
- maxChunkSize;
4
- overlap;
5
- constructor(options) {
6
- this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.default.max;
7
- this.overlap = options?.overlap ?? CHUNK_SIZES.default.overlap;
8
- }
9
- chunk(content, metadata) {
10
- if (content.length <= this.maxChunkSize) {
11
- return [
12
- {
13
- text: content,
14
- sourcePath: metadata.sourcePath,
15
- contentType: metadata.contentType,
16
- chunkIndex: 0,
17
- totalChunks: 1,
18
- startLine: 1,
19
- endLine: content.split("\n").length
20
- }
21
- ];
22
- }
23
- const lines = content.split("\n");
24
- const chunks = [];
25
- let currentLines = [];
26
- let currentSize = 0;
27
- let startLine = 1;
28
- for (let i = 0; i < lines.length; i++) {
29
- const line = lines[i];
30
- if (currentSize + line.length + 1 > this.maxChunkSize && currentLines.length > 0) {
31
- chunks.push({
32
- text: currentLines.join("\n"),
33
- sourcePath: metadata.sourcePath,
34
- contentType: metadata.contentType,
35
- chunkIndex: chunks.length,
36
- totalChunks: 0,
37
- startLine,
38
- endLine: startLine + currentLines.length - 1
39
- });
40
- const overlapLines = [];
41
- let overlapSize = 0;
42
- for (let j = currentLines.length - 1; j >= 0; j--) {
43
- if (overlapSize + currentLines[j].length + 1 > this.overlap) break;
44
- overlapLines.unshift(currentLines[j]);
45
- overlapSize += currentLines[j].length + 1;
46
- }
47
- startLine = startLine + currentLines.length - overlapLines.length;
48
- currentLines = [...overlapLines, line];
49
- currentSize = overlapSize + line.length + 1;
50
- } else {
51
- currentLines.push(line);
52
- currentSize += line.length + 1;
53
- }
54
- }
55
- if (currentLines.length > 0) {
56
- chunks.push({
57
- text: currentLines.join("\n"),
58
- sourcePath: metadata.sourcePath,
59
- contentType: metadata.contentType,
60
- chunkIndex: chunks.length,
61
- totalChunks: 0,
62
- startLine,
63
- endLine: startLine + currentLines.length - 1
64
- });
65
- }
66
- return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
67
- }
68
- }
69
- export {
70
- GenericChunker
71
- };
72
- //# sourceMappingURL=generic-chunker.js.map
1
+ import{CHUNK_SIZES as p}from"@kb/core";class C{maxChunkSize;overlap;constructor(n){this.maxChunkSize=n?.maxChunkSize??p.default.max,this.overlap=n?.overlap??p.default.overlap}chunk(n,h){if(n.length<=this.maxChunkSize)return[{text:n,sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:0,totalChunks:1,startLine:1,endLine:n.split(`
2
+ `).length}];const c=n.split(`
3
+ `),r=[];let e=[],o=0,l=1;for(let u=0;u<c.length;u++){const t=c[u];if(o+t.length+1>this.maxChunkSize&&e.length>0){r.push({text:e.join(`
4
+ `),sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:r.length,totalChunks:0,startLine:l,endLine:l+e.length-1});const s=[];let a=0;for(let i=e.length-1;i>=0&&!(a+e[i].length+1>this.overlap);i--)s.unshift(e[i]),a+=e[i].length+1;l=l+e.length-s.length,e=[...s,t],o=a+t.length+1}else e.push(t),o+=t.length+1}return e.length>0&&r.push({text:e.join(`
5
+ `),sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:r.length,totalChunks:0,startLine:l,endLine:l+e.length-1}),r.map((u,t)=>({...u,chunkIndex:t,totalChunks:r.length}))}}export{C as GenericChunker};
@@ -1,21 +1 @@
1
- import { extractCallEdges } from "./call-graph-extractor.js";
2
- import { createChunker } from "./chunker-factory.js";
3
- import { CodeChunker } from "./code-chunker.js";
4
- import { GenericChunker } from "./generic-chunker.js";
5
- import { MarkdownChunker } from "./markdown-chunker.js";
6
- import {
7
- initializeTreeSitter,
8
- TreeSitterChunker,
9
- TreeSitterRuntime
10
- } from "./treesitter-chunker.js";
11
- export {
12
- CodeChunker,
13
- GenericChunker,
14
- MarkdownChunker,
15
- TreeSitterChunker,
16
- TreeSitterRuntime,
17
- createChunker,
18
- extractCallEdges,
19
- initializeTreeSitter
20
- };
21
- //# sourceMappingURL=index.js.map
1
+ import{extractCallEdges as t}from"./call-graph-extractor.js";import{createChunker as n}from"./chunker-factory.js";import{CodeChunker as p}from"./code-chunker.js";import{GenericChunker as f}from"./generic-chunker.js";import{MarkdownChunker as m}from"./markdown-chunker.js";import{initializeTreeSitter as x,TreeSitterChunker as a,TreeSitterRuntime as h}from"./treesitter-chunker.js";export{p as CodeChunker,f as GenericChunker,m as MarkdownChunker,a as TreeSitterChunker,h as TreeSitterRuntime,n as createChunker,t as extractCallEdges,x as initializeTreeSitter};
@@ -1,122 +1,10 @@
1
- import { CHUNK_SIZES } from "@kb/core";
2
- class MarkdownChunker {
3
- maxChunkSize;
4
- minChunkSize;
5
- constructor(options) {
6
- this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.markdown.max;
7
- this.minChunkSize = options?.minChunkSize ?? CHUNK_SIZES.markdown.min;
8
- }
9
- chunk(content, metadata) {
10
- const sections = this.splitByHeadings(content);
11
- const chunks = [];
12
- for (const section of sections) {
13
- if (section.text.trim().length < this.minChunkSize) {
14
- if (chunks.length > 0) {
15
- const prev = chunks[chunks.length - 1];
16
- prev.text += `
1
+ import{CHUNK_SIZES as u}from"@kb/core";class p{maxChunkSize;minChunkSize;constructor(r){this.maxChunkSize=r?.maxChunkSize??u.markdown.max,this.minChunkSize=r?.minChunkSize??u.markdown.min}chunk(r,s){const h=this.splitByHeadings(r),t=[];for(const n of h){if(n.text.trim().length<this.minChunkSize&&t.length>0){const i=t[t.length-1];i.text+=`
17
2
 
18
- ${section.text}`;
19
- prev.endLine = section.endLine;
20
- continue;
21
- }
22
- }
23
- if (section.text.length > this.maxChunkSize) {
24
- const subTexts = this.splitByParagraphs(section.text, this.maxChunkSize);
25
- let currentLine = section.startLine;
26
- for (const sub of subTexts) {
27
- const subLines = sub.split("\n").length;
28
- chunks.push({
29
- text: sub,
30
- sourcePath: metadata.sourcePath,
31
- contentType: metadata.contentType,
32
- headingPath: section.headingPath,
33
- chunkIndex: chunks.length,
34
- totalChunks: 0,
35
- // will be set below
36
- startLine: currentLine,
37
- endLine: currentLine + subLines - 1
38
- });
39
- currentLine += subLines;
40
- }
41
- } else {
42
- chunks.push({
43
- text: section.text,
44
- sourcePath: metadata.sourcePath,
45
- contentType: metadata.contentType,
46
- headingPath: section.headingPath,
47
- chunkIndex: chunks.length,
48
- totalChunks: 0,
49
- startLine: section.startLine,
50
- endLine: section.endLine
51
- });
52
- }
53
- }
54
- return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
55
- }
56
- splitByHeadings(content) {
57
- const lines = content.split("\n");
58
- const sections = [];
59
- let currentSection = null;
60
- const headingStack = [];
61
- let inFencedCodeBlock = false;
62
- for (let i = 0; i < lines.length; i++) {
63
- if (/^```/.test(lines[i])) {
64
- inFencedCodeBlock = !inFencedCodeBlock;
65
- }
66
- const match = !inFencedCodeBlock ? lines[i].match(/^(#{1,6})\s+(.+)/) : null;
67
- if (match) {
68
- if (currentSection) {
69
- sections.push(currentSection);
70
- }
71
- const level = match[1].length;
72
- const title = match[2].trim();
73
- while (headingStack.length >= level) {
74
- headingStack.pop();
75
- }
76
- headingStack.push(`${"#".repeat(level)} ${title}`);
77
- currentSection = {
78
- text: lines[i],
79
- headingPath: headingStack.join(" > "),
80
- startLine: i + 1,
81
- endLine: i + 1
82
- };
83
- } else if (currentSection) {
84
- currentSection.text += `
85
- ${lines[i]}`;
86
- currentSection.endLine = i + 1;
87
- } else {
88
- currentSection = {
89
- text: lines[i],
90
- headingPath: "(intro)",
91
- startLine: i + 1,
92
- endLine: i + 1
93
- };
94
- }
95
- }
96
- if (currentSection) sections.push(currentSection);
97
- return sections;
98
- }
99
- splitByParagraphs(text, maxSize) {
100
- const paragraphs = text.split(/\n\n+/);
101
- const result = [];
102
- let current = "";
103
- for (const para of paragraphs) {
104
- if (`${current}
3
+ ${n.text}`,i.endLine=n.endLine;continue}if(n.text.length>this.maxChunkSize){const i=this.splitByParagraphs(n.text,this.maxChunkSize);let e=n.startLine;for(const a of i){const o=a.split(`
4
+ `).length;t.push({text:a,sourcePath:s.sourcePath,contentType:s.contentType,headingPath:n.headingPath,chunkIndex:t.length,totalChunks:0,startLine:e,endLine:e+o-1}),e+=o}}else t.push({text:n.text,sourcePath:s.sourcePath,contentType:s.contentType,headingPath:n.headingPath,chunkIndex:t.length,totalChunks:0,startLine:n.startLine,endLine:n.endLine})}return t.map((n,i)=>({...n,chunkIndex:i,totalChunks:t.length}))}splitByHeadings(r){const s=r.split(`
5
+ `),h=[];let t=null;const n=[];let i=!1;for(let e=0;e<s.length;e++){/^```/.test(s[e])&&(i=!i);const a=i?null:s[e].match(/^(#{1,6})\s+(.+)/);if(a){t&&h.push(t);const o=a[1].length,c=a[2].trim();for(;n.length>=o;)n.pop();n.push(`${"#".repeat(o)} ${c}`),t={text:s[e],headingPath:n.join(" > "),startLine:e+1,endLine:e+1}}else t?(t.text+=`
6
+ ${s[e]}`,t.endLine=e+1):t={text:s[e],headingPath:"(intro)",startLine:e+1,endLine:e+1}}return t&&h.push(t),h}splitByParagraphs(r,s){const h=r.split(/\n\n+/),t=[];let n="";for(const i of h)`${n}
105
7
 
106
- ${para}`.length > maxSize && current.length > 0) {
107
- result.push(current.trim());
108
- current = para;
109
- } else {
110
- current = current ? `${current}
8
+ ${i}`.length>s&&n.length>0?(t.push(n.trim()),n=i):n=n?`${n}
111
9
 
112
- ${para}` : para;
113
- }
114
- }
115
- if (current.trim()) result.push(current.trim());
116
- return result;
117
- }
118
- }
119
- export {
120
- MarkdownChunker
121
- };
122
- //# sourceMappingURL=markdown-chunker.js.map
10
+ ${i}`:i;return n.trim()&&t.push(n.trim()),t}}export{p as MarkdownChunker};