@vpxa/kb 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +3 -3
  2. package/package.json +1 -1
  3. package/packages/analyzers/dist/blast-radius-analyzer.js +13 -114
  4. package/packages/analyzers/dist/dependency-analyzer.js +11 -425
  5. package/packages/analyzers/dist/diagram-generator.js +4 -86
  6. package/packages/analyzers/dist/entry-point-analyzer.js +5 -239
  7. package/packages/analyzers/dist/index.js +1 -23
  8. package/packages/analyzers/dist/knowledge-producer.js +24 -113
  9. package/packages/analyzers/dist/pattern-analyzer.js +5 -359
  10. package/packages/analyzers/dist/regex-call-graph.js +1 -428
  11. package/packages/analyzers/dist/structure-analyzer.js +4 -258
  12. package/packages/analyzers/dist/symbol-analyzer.js +13 -442
  13. package/packages/analyzers/dist/ts-call-graph.js +1 -160
  14. package/packages/analyzers/dist/types.js +0 -1
  15. package/packages/chunker/dist/call-graph-extractor.js +1 -90
  16. package/packages/chunker/dist/chunker-factory.js +1 -36
  17. package/packages/chunker/dist/chunker.interface.js +0 -1
  18. package/packages/chunker/dist/code-chunker.js +14 -134
  19. package/packages/chunker/dist/generic-chunker.js +5 -72
  20. package/packages/chunker/dist/index.js +1 -21
  21. package/packages/chunker/dist/markdown-chunker.js +7 -119
  22. package/packages/chunker/dist/treesitter-chunker.js +8 -234
  23. package/packages/cli/dist/commands/analyze.js +3 -112
  24. package/packages/cli/dist/commands/context-cmds.js +1 -155
  25. package/packages/cli/dist/commands/environment.js +2 -204
  26. package/packages/cli/dist/commands/execution.js +1 -137
  27. package/packages/cli/dist/commands/graph.js +7 -81
  28. package/packages/cli/dist/commands/init.js +9 -87
  29. package/packages/cli/dist/commands/knowledge.js +1 -139
  30. package/packages/cli/dist/commands/search.js +8 -267
  31. package/packages/cli/dist/commands/system.js +4 -241
  32. package/packages/cli/dist/commands/workspace.js +2 -388
  33. package/packages/cli/dist/context.js +1 -14
  34. package/packages/cli/dist/helpers.js +3 -458
  35. package/packages/cli/dist/index.d.ts +1 -1
  36. package/packages/cli/dist/index.js +3 -69
  37. package/packages/cli/dist/kb-init.js +1 -82
  38. package/packages/cli/dist/types.js +0 -1
  39. package/packages/core/dist/constants.js +1 -43
  40. package/packages/core/dist/content-detector.js +1 -79
  41. package/packages/core/dist/errors.js +1 -40
  42. package/packages/core/dist/index.js +1 -9
  43. package/packages/core/dist/logger.js +1 -34
  44. package/packages/core/dist/types.js +0 -1
  45. package/packages/embeddings/dist/embedder.interface.js +0 -1
  46. package/packages/embeddings/dist/index.js +1 -5
  47. package/packages/embeddings/dist/onnx-embedder.js +1 -82
  48. package/packages/indexer/dist/file-hasher.js +1 -13
  49. package/packages/indexer/dist/filesystem-crawler.js +1 -125
  50. package/packages/indexer/dist/graph-extractor.js +1 -111
  51. package/packages/indexer/dist/incremental-indexer.js +1 -278
  52. package/packages/indexer/dist/index.js +1 -14
  53. package/packages/server/dist/api.js +1 -9
  54. package/packages/server/dist/config.js +1 -75
  55. package/packages/server/dist/curated-manager.js +9 -356
  56. package/packages/server/dist/index.js +1 -134
  57. package/packages/server/dist/replay-interceptor.js +1 -38
  58. package/packages/server/dist/resources/resources.js +2 -40
  59. package/packages/server/dist/server.js +1 -247
  60. package/packages/server/dist/tools/analyze.tools.js +1 -288
  61. package/packages/server/dist/tools/forge.tools.js +11 -499
  62. package/packages/server/dist/tools/forget.tool.js +3 -39
  63. package/packages/server/dist/tools/graph.tool.js +5 -110
  64. package/packages/server/dist/tools/list.tool.js +5 -53
  65. package/packages/server/dist/tools/lookup.tool.js +8 -51
  66. package/packages/server/dist/tools/onboard.tool.js +2 -112
  67. package/packages/server/dist/tools/produce.tool.js +4 -74
  68. package/packages/server/dist/tools/read.tool.js +4 -47
  69. package/packages/server/dist/tools/reindex.tool.js +2 -70
  70. package/packages/server/dist/tools/remember.tool.js +3 -42
  71. package/packages/server/dist/tools/replay.tool.js +6 -88
  72. package/packages/server/dist/tools/search.tool.js +17 -327
  73. package/packages/server/dist/tools/status.tool.js +3 -68
  74. package/packages/server/dist/tools/toolkit.tools.js +20 -1673
  75. package/packages/server/dist/tools/update.tool.js +3 -39
  76. package/packages/server/dist/tools/utility.tools.js +19 -456
  77. package/packages/store/dist/graph-store.interface.js +0 -1
  78. package/packages/store/dist/index.js +1 -9
  79. package/packages/store/dist/lance-store.js +1 -258
  80. package/packages/store/dist/sqlite-graph-store.js +8 -309
  81. package/packages/store/dist/store-factory.js +1 -14
  82. package/packages/store/dist/store.interface.js +0 -1
  83. package/packages/tools/dist/batch.js +1 -45
  84. package/packages/tools/dist/changelog.js +2 -112
  85. package/packages/tools/dist/check.js +2 -59
  86. package/packages/tools/dist/checkpoint.js +2 -43
  87. package/packages/tools/dist/codemod.js +2 -69
  88. package/packages/tools/dist/compact.js +3 -60
  89. package/packages/tools/dist/data-transform.js +1 -124
  90. package/packages/tools/dist/dead-symbols.js +2 -71
  91. package/packages/tools/dist/delegate.js +3 -128
  92. package/packages/tools/dist/diff-parse.js +3 -153
  93. package/packages/tools/dist/digest.js +7 -242
  94. package/packages/tools/dist/encode.js +1 -46
  95. package/packages/tools/dist/env-info.js +1 -58
  96. package/packages/tools/dist/eval.js +3 -79
  97. package/packages/tools/dist/evidence-map.js +3 -203
  98. package/packages/tools/dist/file-summary.js +2 -106
  99. package/packages/tools/dist/file-walk.js +1 -75
  100. package/packages/tools/dist/find-examples.js +3 -48
  101. package/packages/tools/dist/find.js +1 -120
  102. package/packages/tools/dist/forge-classify.js +2 -319
  103. package/packages/tools/dist/forge-ground.js +1 -184
  104. package/packages/tools/dist/git-context.js +3 -46
  105. package/packages/tools/dist/graph-query.js +1 -194
  106. package/packages/tools/dist/health.js +1 -118
  107. package/packages/tools/dist/http-request.js +1 -58
  108. package/packages/tools/dist/index.js +1 -273
  109. package/packages/tools/dist/lane.js +7 -227
  110. package/packages/tools/dist/measure.js +2 -119
  111. package/packages/tools/dist/onboard.js +42 -1136
  112. package/packages/tools/dist/parse-output.js +2 -158
  113. package/packages/tools/dist/process-manager.js +1 -69
  114. package/packages/tools/dist/queue.js +2 -126
  115. package/packages/tools/dist/regex-test.js +1 -39
  116. package/packages/tools/dist/rename.js +2 -70
  117. package/packages/tools/dist/replay.js +6 -108
  118. package/packages/tools/dist/schema-validate.js +1 -141
  119. package/packages/tools/dist/scope-map.js +1 -72
  120. package/packages/tools/dist/snippet.js +1 -80
  121. package/packages/tools/dist/stash.js +2 -60
  122. package/packages/tools/dist/stratum-card.js +5 -238
  123. package/packages/tools/dist/symbol.js +3 -87
  124. package/packages/tools/dist/test-run.js +2 -55
  125. package/packages/tools/dist/text-utils.js +2 -31
  126. package/packages/tools/dist/time-utils.js +1 -135
  127. package/packages/tools/dist/trace.js +2 -114
  128. package/packages/tools/dist/truncation.js +10 -41
  129. package/packages/tools/dist/watch.js +1 -61
  130. package/packages/tools/dist/web-fetch.js +9 -244
  131. package/packages/tools/dist/web-search.js +1 -46
  132. package/packages/tools/dist/workset.js +2 -77
  133. package/packages/tui/dist/App.js +260 -52468
  134. package/packages/tui/dist/index.js +286 -54551
  135. package/packages/tui/dist/panels/CuratedPanel.js +211 -34291
  136. package/packages/tui/dist/panels/LogPanel.js +259 -51703
  137. package/packages/tui/dist/panels/SearchPanel.js +212 -34824
  138. package/packages/tui/dist/panels/StatusPanel.js +211 -34304
@@ -1,90 +1 @@
1
- import { extname } from "node:path";
2
- import { TreeSitterRuntime } from "./treesitter-chunker.js";
3
- const FUNCTION_NODE_TYPES = /* @__PURE__ */ new Set([
4
- // TS/JS
5
- "function_declaration",
6
- "method_definition",
7
- "arrow_function",
8
- // Python
9
- "function_definition",
10
- // Go
11
- "function_declaration",
12
- "method_declaration",
13
- // Rust
14
- "function_item",
15
- // Java
16
- "method_declaration",
17
- "constructor_declaration"
18
- ]);
19
- const CALL_NODE_TYPES = /* @__PURE__ */ new Set([
20
- "call_expression",
21
- // TS/JS/Go/Rust
22
- "new_expression",
23
- // TS/JS (new Foo())
24
- "call"
25
- // Python
26
- ]);
27
- function extractCallEdges(content, filePath) {
28
- const runtime = TreeSitterRuntime.get();
29
- if (!runtime) return null;
30
- const ext = extname(filePath).toLowerCase();
31
- if (!runtime.hasLanguage(ext)) return null;
32
- const tree = runtime.parse(content, ext);
33
- if (!tree) return null;
34
- const edges = [];
35
- const rootNode = tree.rootNode;
36
- walkNode(rootNode, filePath, "<module>", edges);
37
- return edges;
38
- }
39
- function walkNode(node, filePath, currentScope, edges) {
40
- if (!node) return;
41
- let scope = currentScope;
42
- if (FUNCTION_NODE_TYPES.has(node.type)) {
43
- scope = extractFunctionName(node) ?? currentScope;
44
- }
45
- if (CALL_NODE_TYPES.has(node.type)) {
46
- const callee = extractCalleeName(node);
47
- if (callee) {
48
- edges.push({
49
- callerFile: filePath,
50
- callerName: scope,
51
- calleeName: callee,
52
- line: (node.startPosition?.row ?? 0) + 1
53
- });
54
- }
55
- }
56
- for (let i = 0; i < (node.childCount ?? 0); i++) {
57
- const child = node.child(i);
58
- if (child) walkNode(child, filePath, scope, edges);
59
- }
60
- }
61
- function extractFunctionName(node) {
62
- for (let i = 0; i < (node.childCount ?? 0); i++) {
63
- const child = node.child(i);
64
- if (!child) continue;
65
- if (child.type === "identifier" || child.type === "property_identifier" || child.type === "name") {
66
- return child.text ?? null;
67
- }
68
- }
69
- return null;
70
- }
71
- function extractCalleeName(node) {
72
- const fn = node.childForFieldName?.("function") ?? node.child(0);
73
- if (!fn) return null;
74
- if (fn.type === "identifier" || fn.type === "name") {
75
- return fn.text ?? null;
76
- }
77
- if (fn.type === "member_expression" || fn.type === "attribute") {
78
- const property = fn.childForFieldName?.("property") ?? fn.childForFieldName?.("attribute");
79
- return property?.text ?? null;
80
- }
81
- if (node.type === "new_expression") {
82
- const ctor = node.child(1);
83
- return ctor?.text ?? null;
84
- }
85
- return null;
86
- }
87
- export {
88
- extractCallEdges
89
- };
90
- //# sourceMappingURL=call-graph-extractor.js.map
1
+ import{extname as a}from"node:path";import{TreeSitterRuntime as u}from"./treesitter-chunker.js";const s=new Set(["function_declaration","method_definition","arrow_function","function_definition","function_declaration","method_declaration","function_item","method_declaration","constructor_declaration"]),f=new Set(["call_expression","new_expression","call"]);function g(t,e){const n=u.get();if(!n)return null;const l=a(e).toLowerCase();if(!n.hasLanguage(l))return null;const i=n.parse(t,l);if(!i)return null;const r=[],o=i.rootNode;return c(o,e,"<module>",r),r}function c(t,e,n,l){if(!t)return;let i=n;if(s.has(t.type)&&(i=p(t)??n),f.has(t.type)){const r=d(t);r&&l.push({callerFile:e,callerName:i,calleeName:r,line:(t.startPosition?.row??0)+1})}for(let r=0;r<(t.childCount??0);r++){const o=t.child(r);o&&c(o,e,i,l)}}function p(t){for(let e=0;e<(t.childCount??0);e++){const n=t.child(e);if(n&&(n.type==="identifier"||n.type==="property_identifier"||n.type==="name"))return n.text??null}return null}function d(t){const e=t.childForFieldName?.("function")??t.child(0);return e?e.type==="identifier"||e.type==="name"?e.text??null:e.type==="member_expression"||e.type==="attribute"?(e.childForFieldName?.("property")??e.childForFieldName?.("attribute"))?.text??null:t.type==="new_expression"?t.child(1)?.text??null:null:null}export{g as extractCallEdges};
@@ -1,36 +1 @@
1
- import { CodeChunker } from "./code-chunker.js";
2
- import { GenericChunker } from "./generic-chunker.js";
3
- import { MarkdownChunker } from "./markdown-chunker.js";
4
- import { TreeSitterChunker, TreeSitterRuntime } from "./treesitter-chunker.js";
5
- function createChunker(fileExtension) {
6
- const ext = fileExtension.toLowerCase();
7
- switch (ext) {
8
- case ".md":
9
- case ".mdx":
10
- return new MarkdownChunker();
11
- case ".ts":
12
- case ".tsx":
13
- case ".mts":
14
- case ".cts":
15
- case ".js":
16
- case ".jsx":
17
- case ".mjs":
18
- case ".cjs":
19
- case ".py":
20
- case ".go":
21
- case ".rs":
22
- case ".java": {
23
- const runtime = TreeSitterRuntime.get();
24
- if (runtime?.hasLanguage(ext)) {
25
- return new TreeSitterChunker(runtime);
26
- }
27
- return new CodeChunker();
28
- }
29
- default:
30
- return new GenericChunker();
31
- }
32
- }
33
- export {
34
- createChunker
35
- };
36
- //# sourceMappingURL=chunker-factory.js.map
1
+ import{CodeChunker as s}from"./code-chunker.js";import{GenericChunker as n}from"./generic-chunker.js";import{MarkdownChunker as a}from"./markdown-chunker.js";import{TreeSitterChunker as c,TreeSitterRuntime as o}from"./treesitter-chunker.js";function f(t){const e=t.toLowerCase();switch(e){case".md":case".mdx":return new a;case".ts":case".tsx":case".mts":case".cts":case".js":case".jsx":case".mjs":case".cjs":case".py":case".go":case".rs":case".java":{const r=o.get();return r?.hasLanguage(e)?new c(r):new s}default:return new n}}export{f as createChunker};
@@ -1 +0,0 @@
1
- //# sourceMappingURL=chunker.interface.js.map
@@ -1,134 +1,14 @@
1
- import { CHUNK_SIZES } from "@kb/core";
2
- class CodeChunker {
3
- maxChunkSize;
4
- constructor(options) {
5
- this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.code.max;
6
- }
7
- chunk(content, metadata) {
8
- const boundaries = this.findDeclarationBoundaries(content);
9
- if (boundaries.length === 0) {
10
- return this.fallbackChunk(content, metadata);
11
- }
12
- const chunks = [];
13
- for (let i = 0; i < boundaries.length; i++) {
14
- const start = boundaries[i].offset;
15
- const end = i + 1 < boundaries.length ? boundaries[i + 1].offset : content.length;
16
- let text = content.slice(start, end).trim();
17
- const header = `// File: ${metadata.sourcePath}
18
- `;
19
- text = header + text;
20
- if (text.length > this.maxChunkSize) {
21
- const subChunks = this.splitByLines(text, this.maxChunkSize);
22
- let currentLine = this.getLineNumber(content, start);
23
- for (const sub of subChunks) {
24
- const subLines = sub.split("\n").length;
25
- chunks.push({
26
- text: sub,
27
- sourcePath: metadata.sourcePath,
28
- contentType: metadata.contentType,
29
- chunkIndex: chunks.length,
30
- totalChunks: 0,
31
- startLine: currentLine,
32
- endLine: currentLine + subLines - 1
33
- });
34
- currentLine += subLines;
35
- }
36
- } else {
37
- const startLine = this.getLineNumber(content, start);
38
- chunks.push({
39
- text,
40
- sourcePath: metadata.sourcePath,
41
- contentType: metadata.contentType,
42
- chunkIndex: chunks.length,
43
- totalChunks: 0,
44
- startLine,
45
- endLine: startLine + text.split("\n").length - 1
46
- });
47
- }
48
- }
49
- if (boundaries[0].offset > 0) {
50
- const preamble = content.slice(0, boundaries[0].offset).trim();
51
- if (preamble.length > 0) {
52
- chunks.unshift({
53
- text: `// File: ${metadata.sourcePath}
54
- ${preamble}`,
55
- sourcePath: metadata.sourcePath,
56
- contentType: metadata.contentType,
57
- chunkIndex: 0,
58
- totalChunks: 0,
59
- startLine: 1,
60
- endLine: this.getLineNumber(content, boundaries[0].offset) - 1
61
- });
62
- }
63
- }
64
- return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
65
- }
66
- findDeclarationBoundaries(content) {
67
- const pattern = /^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|enum|abstract\s+class)\s+(\w+)/gm;
68
- const boundaries = [];
69
- let match;
70
- while ((match = pattern.exec(content)) !== null) {
71
- const lineStart = content.lastIndexOf("\n", match.index - 1) + 1;
72
- let actualStart = lineStart;
73
- const beforeContent = content.slice(0, lineStart);
74
- const beforeLines = beforeContent.split("\n");
75
- let j = beforeLines.length - 1;
76
- while (j >= 0) {
77
- const line = beforeLines[j].trim();
78
- if (line === "" || line.startsWith("//") || line.startsWith("*") || line.startsWith("/*") || line.startsWith("*/") || line.startsWith("@")) {
79
- j--;
80
- } else {
81
- break;
82
- }
83
- }
84
- if (j < beforeLines.length - 1) {
85
- actualStart = beforeLines.slice(0, j + 1).join("\n").length + 1;
86
- }
87
- boundaries.push({
88
- offset: actualStart,
89
- name: match[1]
90
- });
91
- }
92
- return boundaries;
93
- }
94
- fallbackChunk(content, metadata) {
95
- const header = `// File: ${metadata.sourcePath}
96
- `;
97
- return [
98
- {
99
- text: header + content,
100
- sourcePath: metadata.sourcePath,
101
- contentType: metadata.contentType,
102
- chunkIndex: 0,
103
- totalChunks: 1,
104
- startLine: 1,
105
- endLine: content.split("\n").length
106
- }
107
- ];
108
- }
109
- splitByLines(text, maxSize) {
110
- const lines = text.split("\n");
111
- const result = [];
112
- let current = [];
113
- let currentSize = 0;
114
- for (const line of lines) {
115
- if (currentSize + line.length + 1 > maxSize && current.length > 0) {
116
- result.push(current.join("\n"));
117
- current = [line];
118
- currentSize = line.length;
119
- } else {
120
- current.push(line);
121
- currentSize += line.length + 1;
122
- }
123
- }
124
- if (current.length > 0) result.push(current.join("\n"));
125
- return result;
126
- }
127
- getLineNumber(content, offset) {
128
- return content.slice(0, offset).split("\n").length;
129
- }
130
- }
131
- export {
132
- CodeChunker
133
- };
134
- //# sourceMappingURL=code-chunker.js.map
1
+ import{CHUNK_SIZES as p}from"../../core/dist/index.js";class C{maxChunkSize;constructor(e){this.maxChunkSize=e?.maxChunkSize??p.code.max}chunk(e,n){const r=this.findDeclarationBoundaries(e);if(r.length===0)return this.fallbackChunk(e,n);const s=[];for(let t=0;t<r.length;t++){const i=r[t].offset,l=t+1<r.length?r[t+1].offset:e.length;let h=e.slice(i,l).trim();if(h=`// File: ${n.sourcePath}
2
+ `+h,h.length>this.maxChunkSize){const u=this.splitByLines(h,this.maxChunkSize);let c=this.getLineNumber(e,i);for(const a of u){const f=a.split(`
3
+ `).length;s.push({text:a,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:s.length,totalChunks:0,startLine:c,endLine:c+f-1}),c+=f}}else{const u=this.getLineNumber(e,i);s.push({text:h,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:s.length,totalChunks:0,startLine:u,endLine:u+h.split(`
4
+ `).length-1})}}if(r[0].offset>0){const t=e.slice(0,r[0].offset).trim();t.length>0&&s.unshift({text:`// File: ${n.sourcePath}
5
+ ${t}`,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:0,totalChunks:0,startLine:1,endLine:this.getLineNumber(e,r[0].offset)-1})}return s.map((t,i)=>({...t,chunkIndex:i,totalChunks:s.length}))}findDeclarationBoundaries(e){const n=/^(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|const|enum|abstract\s+class)\s+(\w+)/gm,r=[];let s;for(;(s=n.exec(e))!==null;){const t=e.lastIndexOf(`
6
+ `,s.index-1)+1;let i=t;const h=e.slice(0,t).split(`
7
+ `);let o=h.length-1;for(;o>=0;){const u=h[o].trim();if(u===""||u.startsWith("//")||u.startsWith("*")||u.startsWith("/*")||u.startsWith("*/")||u.startsWith("@"))o--;else break}o<h.length-1&&(i=h.slice(0,o+1).join(`
8
+ `).length+1),r.push({offset:i,name:s[1]})}return r}fallbackChunk(e,n){return[{text:`// File: ${n.sourcePath}
9
+ `+e,sourcePath:n.sourcePath,contentType:n.contentType,chunkIndex:0,totalChunks:1,startLine:1,endLine:e.split(`
10
+ `).length}]}splitByLines(e,n){const r=e.split(`
11
+ `),s=[];let t=[],i=0;for(const l of r)i+l.length+1>n&&t.length>0?(s.push(t.join(`
12
+ `)),t=[l],i=l.length):(t.push(l),i+=l.length+1);return t.length>0&&s.push(t.join(`
13
+ `)),s}getLineNumber(e,n){return e.slice(0,n).split(`
14
+ `).length}}export{C as CodeChunker};
@@ -1,72 +1,5 @@
1
- import { CHUNK_SIZES } from "@kb/core";
2
- class GenericChunker {
3
- maxChunkSize;
4
- overlap;
5
- constructor(options) {
6
- this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.default.max;
7
- this.overlap = options?.overlap ?? CHUNK_SIZES.default.overlap;
8
- }
9
- chunk(content, metadata) {
10
- if (content.length <= this.maxChunkSize) {
11
- return [
12
- {
13
- text: content,
14
- sourcePath: metadata.sourcePath,
15
- contentType: metadata.contentType,
16
- chunkIndex: 0,
17
- totalChunks: 1,
18
- startLine: 1,
19
- endLine: content.split("\n").length
20
- }
21
- ];
22
- }
23
- const lines = content.split("\n");
24
- const chunks = [];
25
- let currentLines = [];
26
- let currentSize = 0;
27
- let startLine = 1;
28
- for (let i = 0; i < lines.length; i++) {
29
- const line = lines[i];
30
- if (currentSize + line.length + 1 > this.maxChunkSize && currentLines.length > 0) {
31
- chunks.push({
32
- text: currentLines.join("\n"),
33
- sourcePath: metadata.sourcePath,
34
- contentType: metadata.contentType,
35
- chunkIndex: chunks.length,
36
- totalChunks: 0,
37
- startLine,
38
- endLine: startLine + currentLines.length - 1
39
- });
40
- const overlapLines = [];
41
- let overlapSize = 0;
42
- for (let j = currentLines.length - 1; j >= 0; j--) {
43
- if (overlapSize + currentLines[j].length + 1 > this.overlap) break;
44
- overlapLines.unshift(currentLines[j]);
45
- overlapSize += currentLines[j].length + 1;
46
- }
47
- startLine = startLine + currentLines.length - overlapLines.length;
48
- currentLines = [...overlapLines, line];
49
- currentSize = overlapSize + line.length + 1;
50
- } else {
51
- currentLines.push(line);
52
- currentSize += line.length + 1;
53
- }
54
- }
55
- if (currentLines.length > 0) {
56
- chunks.push({
57
- text: currentLines.join("\n"),
58
- sourcePath: metadata.sourcePath,
59
- contentType: metadata.contentType,
60
- chunkIndex: chunks.length,
61
- totalChunks: 0,
62
- startLine,
63
- endLine: startLine + currentLines.length - 1
64
- });
65
- }
66
- return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
67
- }
68
- }
69
- export {
70
- GenericChunker
71
- };
72
- //# sourceMappingURL=generic-chunker.js.map
1
+ import{CHUNK_SIZES as p}from"../../core/dist/index.js";class C{maxChunkSize;overlap;constructor(n){this.maxChunkSize=n?.maxChunkSize??p.default.max,this.overlap=n?.overlap??p.default.overlap}chunk(n,h){if(n.length<=this.maxChunkSize)return[{text:n,sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:0,totalChunks:1,startLine:1,endLine:n.split(`
2
+ `).length}];const c=n.split(`
3
+ `),r=[];let e=[],s=0,l=1;for(let u=0;u<c.length;u++){const t=c[u];if(s+t.length+1>this.maxChunkSize&&e.length>0){r.push({text:e.join(`
4
+ `),sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:r.length,totalChunks:0,startLine:l,endLine:l+e.length-1});const o=[];let a=0;for(let i=e.length-1;i>=0&&!(a+e[i].length+1>this.overlap);i--)o.unshift(e[i]),a+=e[i].length+1;l=l+e.length-o.length,e=[...o,t],s=a+t.length+1}else e.push(t),s+=t.length+1}return e.length>0&&r.push({text:e.join(`
5
+ `),sourcePath:h.sourcePath,contentType:h.contentType,chunkIndex:r.length,totalChunks:0,startLine:l,endLine:l+e.length-1}),r.map((u,t)=>({...u,chunkIndex:t,totalChunks:r.length}))}}export{C as GenericChunker};
@@ -1,21 +1 @@
1
- import { extractCallEdges } from "./call-graph-extractor.js";
2
- import { createChunker } from "./chunker-factory.js";
3
- import { CodeChunker } from "./code-chunker.js";
4
- import { GenericChunker } from "./generic-chunker.js";
5
- import { MarkdownChunker } from "./markdown-chunker.js";
6
- import {
7
- initializeTreeSitter,
8
- TreeSitterChunker,
9
- TreeSitterRuntime
10
- } from "./treesitter-chunker.js";
11
- export {
12
- CodeChunker,
13
- GenericChunker,
14
- MarkdownChunker,
15
- TreeSitterChunker,
16
- TreeSitterRuntime,
17
- createChunker,
18
- extractCallEdges,
19
- initializeTreeSitter
20
- };
21
- //# sourceMappingURL=index.js.map
1
+ import{extractCallEdges as t}from"./call-graph-extractor.js";import{createChunker as n}from"./chunker-factory.js";import{CodeChunker as p}from"./code-chunker.js";import{GenericChunker as f}from"./generic-chunker.js";import{MarkdownChunker as m}from"./markdown-chunker.js";import{initializeTreeSitter as x,TreeSitterChunker as a,TreeSitterRuntime as h}from"./treesitter-chunker.js";export{p as CodeChunker,f as GenericChunker,m as MarkdownChunker,a as TreeSitterChunker,h as TreeSitterRuntime,n as createChunker,t as extractCallEdges,x as initializeTreeSitter};
@@ -1,122 +1,10 @@
1
- import { CHUNK_SIZES } from "@kb/core";
2
- class MarkdownChunker {
3
- maxChunkSize;
4
- minChunkSize;
5
- constructor(options) {
6
- this.maxChunkSize = options?.maxChunkSize ?? CHUNK_SIZES.markdown.max;
7
- this.minChunkSize = options?.minChunkSize ?? CHUNK_SIZES.markdown.min;
8
- }
9
- chunk(content, metadata) {
10
- const sections = this.splitByHeadings(content);
11
- const chunks = [];
12
- for (const section of sections) {
13
- if (section.text.trim().length < this.minChunkSize) {
14
- if (chunks.length > 0) {
15
- const prev = chunks[chunks.length - 1];
16
- prev.text += `
1
+ import{CHUNK_SIZES as u}from"../../core/dist/index.js";class k{maxChunkSize;minChunkSize;constructor(r){this.maxChunkSize=r?.maxChunkSize??u.markdown.max,this.minChunkSize=r?.minChunkSize??u.markdown.min}chunk(r,s){const h=this.splitByHeadings(r),t=[];for(const n of h){if(n.text.trim().length<this.minChunkSize&&t.length>0){const i=t[t.length-1];i.text+=`
17
2
 
18
- ${section.text}`;
19
- prev.endLine = section.endLine;
20
- continue;
21
- }
22
- }
23
- if (section.text.length > this.maxChunkSize) {
24
- const subTexts = this.splitByParagraphs(section.text, this.maxChunkSize);
25
- let currentLine = section.startLine;
26
- for (const sub of subTexts) {
27
- const subLines = sub.split("\n").length;
28
- chunks.push({
29
- text: sub,
30
- sourcePath: metadata.sourcePath,
31
- contentType: metadata.contentType,
32
- headingPath: section.headingPath,
33
- chunkIndex: chunks.length,
34
- totalChunks: 0,
35
- // will be set below
36
- startLine: currentLine,
37
- endLine: currentLine + subLines - 1
38
- });
39
- currentLine += subLines;
40
- }
41
- } else {
42
- chunks.push({
43
- text: section.text,
44
- sourcePath: metadata.sourcePath,
45
- contentType: metadata.contentType,
46
- headingPath: section.headingPath,
47
- chunkIndex: chunks.length,
48
- totalChunks: 0,
49
- startLine: section.startLine,
50
- endLine: section.endLine
51
- });
52
- }
53
- }
54
- return chunks.map((c, i) => ({ ...c, chunkIndex: i, totalChunks: chunks.length }));
55
- }
56
- splitByHeadings(content) {
57
- const lines = content.split("\n");
58
- const sections = [];
59
- let currentSection = null;
60
- const headingStack = [];
61
- let inFencedCodeBlock = false;
62
- for (let i = 0; i < lines.length; i++) {
63
- if (/^```/.test(lines[i])) {
64
- inFencedCodeBlock = !inFencedCodeBlock;
65
- }
66
- const match = !inFencedCodeBlock ? lines[i].match(/^(#{1,6})\s+(.+)/) : null;
67
- if (match) {
68
- if (currentSection) {
69
- sections.push(currentSection);
70
- }
71
- const level = match[1].length;
72
- const title = match[2].trim();
73
- while (headingStack.length >= level) {
74
- headingStack.pop();
75
- }
76
- headingStack.push(`${"#".repeat(level)} ${title}`);
77
- currentSection = {
78
- text: lines[i],
79
- headingPath: headingStack.join(" > "),
80
- startLine: i + 1,
81
- endLine: i + 1
82
- };
83
- } else if (currentSection) {
84
- currentSection.text += `
85
- ${lines[i]}`;
86
- currentSection.endLine = i + 1;
87
- } else {
88
- currentSection = {
89
- text: lines[i],
90
- headingPath: "(intro)",
91
- startLine: i + 1,
92
- endLine: i + 1
93
- };
94
- }
95
- }
96
- if (currentSection) sections.push(currentSection);
97
- return sections;
98
- }
99
- splitByParagraphs(text, maxSize) {
100
- const paragraphs = text.split(/\n\n+/);
101
- const result = [];
102
- let current = "";
103
- for (const para of paragraphs) {
104
- if (`${current}
3
+ ${n.text}`,i.endLine=n.endLine;continue}if(n.text.length>this.maxChunkSize){const i=this.splitByParagraphs(n.text,this.maxChunkSize);let e=n.startLine;for(const a of i){const o=a.split(`
4
+ `).length;t.push({text:a,sourcePath:s.sourcePath,contentType:s.contentType,headingPath:n.headingPath,chunkIndex:t.length,totalChunks:0,startLine:e,endLine:e+o-1}),e+=o}}else t.push({text:n.text,sourcePath:s.sourcePath,contentType:s.contentType,headingPath:n.headingPath,chunkIndex:t.length,totalChunks:0,startLine:n.startLine,endLine:n.endLine})}return t.map((n,i)=>({...n,chunkIndex:i,totalChunks:t.length}))}splitByHeadings(r){const s=r.split(`
5
+ `),h=[];let t=null;const n=[];let i=!1;for(let e=0;e<s.length;e++){/^```/.test(s[e])&&(i=!i);const a=i?null:s[e].match(/^(#{1,6})\s+(.+)/);if(a){t&&h.push(t);const o=a[1].length,c=a[2].trim();for(;n.length>=o;)n.pop();n.push(`${"#".repeat(o)} ${c}`),t={text:s[e],headingPath:n.join(" > "),startLine:e+1,endLine:e+1}}else t?(t.text+=`
6
+ ${s[e]}`,t.endLine=e+1):t={text:s[e],headingPath:"(intro)",startLine:e+1,endLine:e+1}}return t&&h.push(t),h}splitByParagraphs(r,s){const h=r.split(/\n\n+/),t=[];let n="";for(const i of h)`${n}
105
7
 
106
- ${para}`.length > maxSize && current.length > 0) {
107
- result.push(current.trim());
108
- current = para;
109
- } else {
110
- current = current ? `${current}
8
+ ${i}`.length>s&&n.length>0?(t.push(n.trim()),n=i):n=n?`${n}
111
9
 
112
- ${para}` : para;
113
- }
114
- }
115
- if (current.trim()) result.push(current.trim());
116
- return result;
117
- }
118
- }
119
- export {
120
- MarkdownChunker
121
- };
122
- //# sourceMappingURL=markdown-chunker.js.map
10
+ ${i}`:i;return n.trim()&&t.push(n.trim()),t}}export{k as MarkdownChunker};