@cdoing/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/coordinator.d.ts +114 -0
- package/dist/agents/coordinator.d.ts.map +1 -0
- package/dist/agents/coordinator.js +158 -0
- package/dist/agents/coordinator.js.map +1 -0
- package/dist/context-providers/clipboard.d.ts +13 -0
- package/dist/context-providers/clipboard.d.ts.map +1 -0
- package/dist/context-providers/clipboard.js +53 -0
- package/dist/context-providers/clipboard.js.map +1 -0
- package/dist/context-providers/codebase.d.ts +46 -0
- package/dist/context-providers/codebase.d.ts.map +1 -0
- package/dist/context-providers/codebase.js +273 -0
- package/dist/context-providers/codebase.js.map +1 -0
- package/dist/context-providers/diff.d.ts +18 -0
- package/dist/context-providers/diff.d.ts.map +1 -0
- package/dist/context-providers/diff.js +63 -0
- package/dist/context-providers/diff.js.map +1 -0
- package/dist/context-providers/docs.d.ts +21 -0
- package/dist/context-providers/docs.d.ts.map +1 -0
- package/dist/context-providers/docs.js +180 -0
- package/dist/context-providers/docs.js.map +1 -0
- package/dist/context-providers/file-include.d.ts +13 -0
- package/dist/context-providers/file-include.d.ts.map +1 -0
- package/dist/context-providers/file-include.js +82 -0
- package/dist/context-providers/file-include.js.map +1 -0
- package/dist/context-providers/folder.d.ts +19 -0
- package/dist/context-providers/folder.d.ts.map +1 -0
- package/dist/context-providers/folder.js +130 -0
- package/dist/context-providers/folder.js.map +1 -0
- package/dist/context-providers/git.d.ts +19 -0
- package/dist/context-providers/git.d.ts.map +1 -0
- package/dist/context-providers/git.js +74 -0
- package/dist/context-providers/git.js.map +1 -0
- package/dist/context-providers/index.d.ts +26 -0
- package/dist/context-providers/index.d.ts.map +1 -0
- package/dist/context-providers/index.js +37 -0
- package/dist/context-providers/index.js.map +1 -0
- package/dist/context-providers/open-files.d.ts +25 -0
- package/dist/context-providers/open-files.d.ts.map +1 -0
- package/dist/context-providers/open-files.js +134 -0
- package/dist/context-providers/open-files.js.map +1 -0
- package/dist/context-providers/problems.d.ts +24 -0
- package/dist/context-providers/problems.d.ts.map +1 -0
- package/dist/context-providers/problems.js +97 -0
- package/dist/context-providers/problems.js.map +1 -0
- package/dist/context-providers/registry.d.ts +61 -0
- package/dist/context-providers/registry.d.ts.map +1 -0
- package/dist/context-providers/registry.js +92 -0
- package/dist/context-providers/registry.js.map +1 -0
- package/dist/context-providers/terminal.d.ts +25 -0
- package/dist/context-providers/terminal.d.ts.map +1 -0
- package/dist/context-providers/terminal.js +55 -0
- package/dist/context-providers/terminal.js.map +1 -0
- package/dist/context-providers/tree.d.ts +29 -0
- package/dist/context-providers/tree.d.ts.map +1 -0
- package/dist/context-providers/tree.js +172 -0
- package/dist/context-providers/tree.js.map +1 -0
- package/dist/context-providers/types.d.ts +72 -0
- package/dist/context-providers/types.d.ts.map +1 -0
- package/dist/context-providers/types.js +10 -0
- package/dist/context-providers/types.js.map +1 -0
- package/dist/context-providers/url.d.ts +27 -0
- package/dist/context-providers/url.d.ts.map +1 -0
- package/dist/context-providers/url.js +131 -0
- package/dist/context-providers/url.js.map +1 -0
- package/dist/effort/index.d.ts +78 -0
- package/dist/effort/index.d.ts.map +1 -0
- package/dist/effort/index.js +146 -0
- package/dist/effort/index.js.map +1 -0
- package/dist/hooks/index.d.ts +47 -0
- package/dist/hooks/index.d.ts.map +1 -0
- package/dist/hooks/index.js +151 -0
- package/dist/hooks/index.js.map +1 -0
- package/dist/index.d.ts +75 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +152 -0
- package/dist/index.js.map +1 -0
- package/dist/indexing/chunker.d.ts +25 -0
- package/dist/indexing/chunker.d.ts.map +1 -0
- package/dist/indexing/chunker.js +217 -0
- package/dist/indexing/chunker.js.map +1 -0
- package/dist/indexing/database.d.ts +49 -0
- package/dist/indexing/database.d.ts.map +1 -0
- package/dist/indexing/database.js +287 -0
- package/dist/indexing/database.js.map +1 -0
- package/dist/indexing/index.d.ts +9 -0
- package/dist/indexing/index.d.ts.map +1 -0
- package/dist/indexing/index.js +13 -0
- package/dist/indexing/index.js.map +1 -0
- package/dist/indexing/indexer.d.ts +63 -0
- package/dist/indexing/indexer.d.ts.map +1 -0
- package/dist/indexing/indexer.js +352 -0
- package/dist/indexing/indexer.js.map +1 -0
- package/dist/indexing/recent-edits-cache.d.ts +77 -0
- package/dist/indexing/recent-edits-cache.d.ts.map +1 -0
- package/dist/indexing/recent-edits-cache.js +123 -0
- package/dist/indexing/recent-edits-cache.js.map +1 -0
- package/dist/indexing/types.d.ts +39 -0
- package/dist/indexing/types.d.ts.map +1 -0
- package/dist/indexing/types.js +6 -0
- package/dist/indexing/types.js.map +1 -0
- package/dist/mcp/index.d.ts +33 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +37 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/manager.d.ts +123 -0
- package/dist/mcp/manager.d.ts.map +1 -0
- package/dist/mcp/manager.js +331 -0
- package/dist/mcp/manager.js.map +1 -0
- package/dist/oauth.d.ts +33 -0
- package/dist/oauth.d.ts.map +1 -0
- package/dist/oauth.js +312 -0
- package/dist/oauth.js.map +1 -0
- package/dist/permissions/index.d.ts +216 -0
- package/dist/permissions/index.d.ts.map +1 -0
- package/dist/permissions/index.js +938 -0
- package/dist/permissions/index.js.map +1 -0
- package/dist/plan/index.d.ts +20 -0
- package/dist/plan/index.d.ts.map +1 -0
- package/dist/plan/index.js +24 -0
- package/dist/plan/index.js.map +1 -0
- package/dist/plan/manager.d.ts +101 -0
- package/dist/plan/manager.d.ts.map +1 -0
- package/dist/plan/manager.js +170 -0
- package/dist/plan/manager.js.map +1 -0
- package/dist/rules/index.d.ts +28 -0
- package/dist/rules/index.d.ts.map +1 -0
- package/dist/rules/index.js +31 -0
- package/dist/rules/index.js.map +1 -0
- package/dist/rules/manager.d.ts +77 -0
- package/dist/rules/manager.d.ts.map +1 -0
- package/dist/rules/manager.js +279 -0
- package/dist/rules/manager.js.map +1 -0
- package/dist/rules/types.d.ts +34 -0
- package/dist/rules/types.d.ts.map +1 -0
- package/dist/rules/types.js +9 -0
- package/dist/rules/types.js.map +1 -0
- package/dist/sandbox/filesystem.d.ts +20 -0
- package/dist/sandbox/filesystem.d.ts.map +1 -0
- package/dist/sandbox/filesystem.js +141 -0
- package/dist/sandbox/filesystem.js.map +1 -0
- package/dist/sandbox/index.d.ts +4 -0
- package/dist/sandbox/index.d.ts.map +1 -0
- package/dist/sandbox/index.js +8 -0
- package/dist/sandbox/index.js.map +1 -0
- package/dist/sandbox/manager.d.ts +47 -0
- package/dist/sandbox/manager.d.ts.map +1 -0
- package/dist/sandbox/manager.js +220 -0
- package/dist/sandbox/manager.js.map +1 -0
- package/dist/sandbox/network.d.ts +14 -0
- package/dist/sandbox/network.d.ts.map +1 -0
- package/dist/sandbox/network.js +87 -0
- package/dist/sandbox/network.js.map +1 -0
- package/dist/sandbox/types.d.ts +42 -0
- package/dist/sandbox/types.d.ts.map +1 -0
- package/dist/sandbox/types.js +25 -0
- package/dist/sandbox/types.js.map +1 -0
- package/dist/tools/ast-edit.d.ts +57 -0
- package/dist/tools/ast-edit.d.ts.map +1 -0
- package/dist/tools/ast-edit.js +443 -0
- package/dist/tools/ast-edit.js.map +1 -0
- package/dist/tools/code-verify.d.ts +8 -0
- package/dist/tools/code-verify.d.ts.map +1 -0
- package/dist/tools/code-verify.js +159 -0
- package/dist/tools/code-verify.js.map +1 -0
- package/dist/tools/codebase-search.d.ts +17 -0
- package/dist/tools/codebase-search.d.ts.map +1 -0
- package/dist/tools/codebase-search.js +104 -0
- package/dist/tools/codebase-search.js.map +1 -0
- package/dist/tools/file-delete.d.ts +26 -0
- package/dist/tools/file-delete.d.ts.map +1 -0
- package/dist/tools/file-delete.js +179 -0
- package/dist/tools/file-delete.js.map +1 -0
- package/dist/tools/file-edit.d.ts +10 -0
- package/dist/tools/file-edit.d.ts.map +1 -0
- package/dist/tools/file-edit.js +138 -0
- package/dist/tools/file-edit.js.map +1 -0
- package/dist/tools/file-read.d.ts +12 -0
- package/dist/tools/file-read.d.ts.map +1 -0
- package/dist/tools/file-read.js +211 -0
- package/dist/tools/file-read.js.map +1 -0
- package/dist/tools/file-run.d.ts +10 -0
- package/dist/tools/file-run.d.ts.map +1 -0
- package/dist/tools/file-run.js +179 -0
- package/dist/tools/file-run.js.map +1 -0
- package/dist/tools/file-write.d.ts +10 -0
- package/dist/tools/file-write.d.ts.map +1 -0
- package/dist/tools/file-write.js +134 -0
- package/dist/tools/file-write.js.map +1 -0
- package/dist/tools/glob-search.d.ts +8 -0
- package/dist/tools/glob-search.d.ts.map +1 -0
- package/dist/tools/glob-search.js +108 -0
- package/dist/tools/glob-search.js.map +1 -0
- package/dist/tools/grep-search.d.ts +8 -0
- package/dist/tools/grep-search.d.ts.map +1 -0
- package/dist/tools/grep-search.js +139 -0
- package/dist/tools/grep-search.js.map +1 -0
- package/dist/tools/list-dir.d.ts +16 -0
- package/dist/tools/list-dir.d.ts.map +1 -0
- package/dist/tools/list-dir.js +183 -0
- package/dist/tools/list-dir.js.map +1 -0
- package/dist/tools/multi-edit.d.ts +16 -0
- package/dist/tools/multi-edit.d.ts.map +1 -0
- package/dist/tools/multi-edit.js +163 -0
- package/dist/tools/multi-edit.js.map +1 -0
- package/dist/tools/notebook-edit.d.ts +31 -0
- package/dist/tools/notebook-edit.d.ts.map +1 -0
- package/dist/tools/notebook-edit.js +321 -0
- package/dist/tools/notebook-edit.js.map +1 -0
- package/dist/tools/registry.d.ts +16 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +41 -0
- package/dist/tools/registry.js.map +1 -0
- package/dist/tools/shell-exec.d.ts +12 -0
- package/dist/tools/shell-exec.d.ts.map +1 -0
- package/dist/tools/shell-exec.js +261 -0
- package/dist/tools/shell-exec.js.map +1 -0
- package/dist/tools/sub-agent-manager.d.ts +57 -0
- package/dist/tools/sub-agent-manager.d.ts.map +1 -0
- package/dist/tools/sub-agent-manager.js +153 -0
- package/dist/tools/sub-agent-manager.js.map +1 -0
- package/dist/tools/sub-agent-status.d.ts +12 -0
- package/dist/tools/sub-agent-status.d.ts.map +1 -0
- package/dist/tools/sub-agent-status.js +59 -0
- package/dist/tools/sub-agent-status.js.map +1 -0
- package/dist/tools/sub-agent-terminate.d.ts +12 -0
- package/dist/tools/sub-agent-terminate.d.ts.map +1 -0
- package/dist/tools/sub-agent-terminate.js +55 -0
- package/dist/tools/sub-agent-terminate.js.map +1 -0
- package/dist/tools/sub-agent.d.ts +34 -0
- package/dist/tools/sub-agent.d.ts.map +1 -0
- package/dist/tools/sub-agent.js +140 -0
- package/dist/tools/sub-agent.js.map +1 -0
- package/dist/tools/system-info.d.ts +24 -0
- package/dist/tools/system-info.d.ts.map +1 -0
- package/dist/tools/system-info.js +220 -0
- package/dist/tools/system-info.js.map +1 -0
- package/dist/tools/todo.d.ts +16 -0
- package/dist/tools/todo.d.ts.map +1 -0
- package/dist/tools/todo.js +144 -0
- package/dist/tools/todo.js.map +1 -0
- package/dist/tools/types.d.ts +20 -0
- package/dist/tools/types.d.ts.map +1 -0
- package/dist/tools/types.js +3 -0
- package/dist/tools/types.js.map +1 -0
- package/dist/tools/view-diff.d.ts +11 -0
- package/dist/tools/view-diff.d.ts.map +1 -0
- package/dist/tools/view-diff.js +88 -0
- package/dist/tools/view-diff.js.map +1 -0
- package/dist/tools/view-repo-map.d.ts +18 -0
- package/dist/tools/view-repo-map.d.ts.map +1 -0
- package/dist/tools/view-repo-map.js +245 -0
- package/dist/tools/view-repo-map.js.map +1 -0
- package/dist/tools/web-fetch.d.ts +13 -0
- package/dist/tools/web-fetch.d.ts.map +1 -0
- package/dist/tools/web-fetch.js +106 -0
- package/dist/tools/web-fetch.js.map +1 -0
- package/dist/tools/web-search.d.ts +10 -0
- package/dist/tools/web-search.d.ts.map +1 -0
- package/dist/tools/web-search.js +106 -0
- package/dist/tools/web-search.js.map +1 -0
- package/dist/utils/gitignore.d.ts +10 -0
- package/dist/utils/gitignore.d.ts.map +1 -0
- package/dist/utils/gitignore.js +104 -0
- package/dist/utils/gitignore.js.map +1 -0
- package/dist/utils/lazy-apply.d.ts +45 -0
- package/dist/utils/lazy-apply.d.ts.map +1 -0
- package/dist/utils/lazy-apply.js +164 -0
- package/dist/utils/lazy-apply.js.map +1 -0
- package/dist/utils/memory.d.ts +36 -0
- package/dist/utils/memory.d.ts.map +1 -0
- package/dist/utils/memory.js +136 -0
- package/dist/utils/memory.js.map +1 -0
- package/dist/utils/path-matching.d.ts +24 -0
- package/dist/utils/path-matching.d.ts.map +1 -0
- package/dist/utils/path-matching.js +116 -0
- package/dist/utils/path-matching.js.map +1 -0
- package/dist/utils/path-safety.d.ts +13 -0
- package/dist/utils/path-safety.d.ts.map +1 -0
- package/dist/utils/path-safety.js +54 -0
- package/dist/utils/path-safety.js.map +1 -0
- package/dist/utils/project-config.d.ts +18 -0
- package/dist/utils/project-config.d.ts.map +1 -0
- package/dist/utils/project-config.js +76 -0
- package/dist/utils/project-config.js.map +1 -0
- package/dist/utils/search-match.d.ts +63 -0
- package/dist/utils/search-match.d.ts.map +1 -0
- package/dist/utils/search-match.js +426 -0
- package/dist/utils/search-match.js.map +1 -0
- package/dist/utils/shell-paths.d.ts +17 -0
- package/dist/utils/shell-paths.d.ts.map +1 -0
- package/dist/utils/shell-paths.js +107 -0
- package/dist/utils/shell-paths.js.map +1 -0
- package/dist/utils/streaming-diff.d.ts +45 -0
- package/dist/utils/streaming-diff.d.ts.map +1 -0
- package/dist/utils/streaming-diff.js +230 -0
- package/dist/utils/streaming-diff.js.map +1 -0
- package/dist/utils/todo.d.ts +47 -0
- package/dist/utils/todo.d.ts.map +1 -0
- package/dist/utils/todo.js +102 -0
- package/dist/utils/todo.js.map +1 -0
- package/package.json +23 -0
- package/src/agents/coordinator.ts +240 -0
- package/src/context-providers/clipboard.ts +48 -0
- package/src/context-providers/codebase.ts +274 -0
- package/src/context-providers/diff.ts +66 -0
- package/src/context-providers/docs.ts +160 -0
- package/src/context-providers/file-include.ts +54 -0
- package/src/context-providers/folder.ts +106 -0
- package/src/context-providers/git.ts +72 -0
- package/src/context-providers/index.ts +26 -0
- package/src/context-providers/open-files.ts +113 -0
- package/src/context-providers/problems.ts +100 -0
- package/src/context-providers/registry.ts +99 -0
- package/src/context-providers/terminal.ts +58 -0
- package/src/context-providers/tree.ts +161 -0
- package/src/context-providers/types.ts +84 -0
- package/src/context-providers/url.ts +138 -0
- package/src/effort/index.ts +177 -0
- package/src/hooks/index.ts +148 -0
- package/src/index.ts +114 -0
- package/src/indexing/README.md +267 -0
- package/src/indexing/chunker.ts +206 -0
- package/src/indexing/database.ts +299 -0
- package/src/indexing/index.ts +15 -0
- package/src/indexing/indexer.ts +383 -0
- package/src/indexing/recent-edits-cache.ts +150 -0
- package/src/indexing/types.ts +44 -0
- package/src/mcp/index.ts +33 -0
- package/src/mcp/manager.ts +385 -0
- package/src/oauth.ts +330 -0
- package/src/permissions/index.ts +1011 -0
- package/src/plan/index.ts +20 -0
- package/src/plan/manager.ts +233 -0
- package/src/rules/index.ts +28 -0
- package/src/rules/manager.ts +276 -0
- package/src/rules/types.ts +40 -0
- package/src/sandbox/filesystem.ts +135 -0
- package/src/sandbox/index.ts +9 -0
- package/src/sandbox/manager.ts +213 -0
- package/src/sandbox/network.ts +101 -0
- package/src/sandbox/types.ts +63 -0
- package/src/tools/ast-edit.ts +493 -0
- package/src/tools/code-verify.ts +143 -0
- package/src/tools/codebase-search.ts +117 -0
- package/src/tools/file-delete.ts +155 -0
- package/src/tools/file-edit.ts +115 -0
- package/src/tools/file-read.ts +195 -0
- package/src/tools/file-run.ts +158 -0
- package/src/tools/file-write.ts +104 -0
- package/src/tools/glob-search.ts +80 -0
- package/src/tools/grep-search.ts +120 -0
- package/src/tools/list-dir.ts +172 -0
- package/src/tools/multi-edit.ts +138 -0
- package/src/tools/notebook-edit.ts +342 -0
- package/src/tools/registry.ts +43 -0
- package/src/tools/shell-exec.ts +251 -0
- package/src/tools/sub-agent-manager.ts +183 -0
- package/src/tools/sub-agent-status.ts +67 -0
- package/src/tools/sub-agent-terminate.ts +62 -0
- package/src/tools/sub-agent.ts +162 -0
- package/src/tools/system-info.ts +248 -0
- package/src/tools/todo.ts +149 -0
- package/src/tools/types.ts +21 -0
- package/src/tools/view-diff.ts +99 -0
- package/src/tools/view-repo-map.ts +249 -0
- package/src/tools/web-fetch.ts +118 -0
- package/src/tools/web-search.ts +129 -0
- package/src/utils/gitignore.ts +73 -0
- package/src/utils/lazy-apply.ts +189 -0
- package/src/utils/memory.ts +124 -0
- package/src/utils/path-matching.ts +84 -0
- package/src/utils/path-safety.ts +19 -0
- package/src/utils/project-config.ts +41 -0
- package/src/utils/search-match.ts +495 -0
- package/src/utils/shell-paths.ts +79 -0
- package/src/utils/streaming-diff.ts +260 -0
- package/src/utils/todo.ts +115 -0
- package/tsconfig.json +18 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// Tools
|
|
2
|
+
export { ToolRegistry } from "./tools/registry";
|
|
3
|
+
export { FileReadTool } from "./tools/file-read";
|
|
4
|
+
export { FileWriteTool } from "./tools/file-write";
|
|
5
|
+
export { FileEditTool } from "./tools/file-edit";
|
|
6
|
+
export { GlobSearchTool } from "./tools/glob-search";
|
|
7
|
+
export { GrepSearchTool } from "./tools/grep-search";
|
|
8
|
+
export { ShellExecTool } from "./tools/shell-exec";
|
|
9
|
+
export { FileRunTool } from "./tools/file-run";
|
|
10
|
+
export { CodeVerifyTool } from "./tools/code-verify";
|
|
11
|
+
export { WebFetchTool } from "./tools/web-fetch";
|
|
12
|
+
export { WebSearchTool } from "./tools/web-search";
|
|
13
|
+
export { SubAgentTool } from "./tools/sub-agent";
|
|
14
|
+
export type { SubAgentRunnerFactory } from "./tools/sub-agent";
|
|
15
|
+
export { SubAgentManager } from "./tools/sub-agent-manager";
|
|
16
|
+
export type { SubAgentStatus, SubAgentEntry } from "./tools/sub-agent-manager";
|
|
17
|
+
export { SubAgentStatusTool } from "./tools/sub-agent-status";
|
|
18
|
+
export { SubAgentTerminateTool } from "./tools/sub-agent-terminate";
|
|
19
|
+
export { TodoTool } from "./tools/todo";
|
|
20
|
+
export { SystemInfoTool } from "./tools/system-info";
|
|
21
|
+
export { MultiEditTool } from "./tools/multi-edit";
|
|
22
|
+
|
|
23
|
+
export { ListDirTool } from "./tools/list-dir";
|
|
24
|
+
export { ViewDiffTool } from "./tools/view-diff";
|
|
25
|
+
export { ViewRepoMapTool } from "./tools/view-repo-map";
|
|
26
|
+
export { CodebaseSearchTool } from "./tools/codebase-search";
|
|
27
|
+
export { ASTEditTool } from "./tools/ast-edit";
|
|
28
|
+
export { NotebookEditTool } from "./tools/notebook-edit";
|
|
29
|
+
export type { ToolDefinition, ToolResult, BaseTool } from "./tools/types";
|
|
30
|
+
|
|
31
|
+
// Search matching utilities
|
|
32
|
+
export { findSearchMatch, findAllSearchMatches, executeFindAndReplace, executeMultiFindAndReplace, isUnifiedDiff, applyUnifiedDiff } from "./utils/search-match";
|
|
33
|
+
|
|
34
|
+
// Streaming diff utilities
|
|
35
|
+
export { streamDeterministicDiff, streamUnifiedDiff, StreamingDiffAccumulator } from "./utils/streaming-diff";
|
|
36
|
+
export type { DiffChunk, DiffChunkCallback } from "./utils/streaming-diff";
|
|
37
|
+
|
|
38
|
+
// Lazy apply — placeholder expansion for LLM-generated edits
|
|
39
|
+
export { hasPlaceholders, expandPlaceholders, isPlaceholderLine } from "./utils/lazy-apply";
|
|
40
|
+
export type { LazyApplyResult } from "./utils/lazy-apply";
|
|
41
|
+
|
|
42
|
+
// Permissions
|
|
43
|
+
export { PermissionManager, PermissionMode } from "./permissions";
|
|
44
|
+
export type { PermissionRule, PermissionScope, PermissionPromptFn } from "./permissions";
|
|
45
|
+
|
|
46
|
+
// Sandbox
|
|
47
|
+
export { SandboxManager } from "./sandbox";
|
|
48
|
+
export { defaultSandboxConfig } from "./sandbox";
|
|
49
|
+
export type { SandboxConfig, SandboxMode, SandboxCheckResult, SandboxFilesystemConfig, SandboxNetworkConfig } from "./sandbox/types";
|
|
50
|
+
|
|
51
|
+
// Hooks
|
|
52
|
+
export { HookManager } from "./hooks";
|
|
53
|
+
export type { HookDefinition, HookResult } from "./hooks";
|
|
54
|
+
|
|
55
|
+
// Utilities
|
|
56
|
+
export { safePath } from "./utils/path-safety";
|
|
57
|
+
export { loadIgnorePatterns } from "./utils/gitignore";
|
|
58
|
+
export { loadProjectConfig, getProjectConfigPath } from "./utils/project-config";
|
|
59
|
+
export { MemoryStore } from "./utils/memory";
|
|
60
|
+
export type { MemoryEntry } from "./utils/memory";
|
|
61
|
+
export { TodoStore } from "./utils/todo";
|
|
62
|
+
export type { TodoItem, TodoStatus } from "./utils/todo";
|
|
63
|
+
|
|
64
|
+
// Context Providers — pluggable @ mention system
|
|
65
|
+
export { ContextProviderRegistry } from "./context-providers/registry";
|
|
66
|
+
export type { ContextProvider, ContextResult, ContextResolveOptions } from "./context-providers/types";
|
|
67
|
+
export { TerminalContextProvider } from "./context-providers/terminal";
|
|
68
|
+
export { OpenFilesContextProvider } from "./context-providers/open-files";
|
|
69
|
+
export { UrlContextProvider } from "./context-providers/url";
|
|
70
|
+
export { TreeContextProvider } from "./context-providers/tree";
|
|
71
|
+
export { ProblemsContextProvider } from "./context-providers/problems";
|
|
72
|
+
export { CodebaseContextProvider } from "./context-providers/codebase";
|
|
73
|
+
export { ClipboardContextProvider } from "./context-providers/clipboard";
|
|
74
|
+
export { FileIncludeContextProvider } from "./context-providers/file-include";
|
|
75
|
+
export { GitContextProvider } from "./context-providers/git";
|
|
76
|
+
export { DiffContextProvider } from "./context-providers/diff";
|
|
77
|
+
export { FolderContextProvider } from "./context-providers/folder";
|
|
78
|
+
export { DocsContextProvider } from "./context-providers/docs";
|
|
79
|
+
|
|
80
|
+
// Project Rules — hierarchical glob-scoped rules
|
|
81
|
+
export { RulesManager } from "./rules/manager";
|
|
82
|
+
export type { Rule, RuleSource } from "./rules/types";
|
|
83
|
+
|
|
84
|
+
// Plan Mode — read-only planning before execution
|
|
85
|
+
export { PlanManager } from "./plan/manager";
|
|
86
|
+
export type { Plan, PlanStep, PlanStatus } from "./plan/manager";
|
|
87
|
+
|
|
88
|
+
// MCP Server Support — Model Context Protocol
|
|
89
|
+
export { McpManager } from "./mcp/manager";
|
|
90
|
+
export type { McpServerConfig, McpTool } from "./mcp/manager";
|
|
91
|
+
|
|
92
|
+
// Effort Level Control — adjusts analysis depth
|
|
93
|
+
export { EffortManager } from "./effort";
|
|
94
|
+
export type { EffortLevel, EffortConfig } from "./effort";
|
|
95
|
+
|
|
96
|
+
// OAuth — shared credential storage, PKCE, token management
|
|
97
|
+
export {
|
|
98
|
+
saveOAuthTokens,
|
|
99
|
+
loadOAuthTokens,
|
|
100
|
+
clearOAuthTokens,
|
|
101
|
+
isOAuthExpired,
|
|
102
|
+
refreshAccessToken,
|
|
103
|
+
resolveOAuthToken,
|
|
104
|
+
generateOAuthUrl,
|
|
105
|
+
exchangeOAuthCode,
|
|
106
|
+
getOAuthStatus,
|
|
107
|
+
} from "./oauth";
|
|
108
|
+
export type { OAuthTokens } from "./oauth";
|
|
109
|
+
|
|
110
|
+
// Codebase Indexing — FTS5 + embeddings
|
|
111
|
+
export { CodebaseIndexer, IndexDatabase, chunkDocument, RecentEditsCache } from "./indexing";
|
|
112
|
+
export type { CachedEdit } from "./indexing";
|
|
113
|
+
export type { EmbeddingProvider } from "./indexing";
|
|
114
|
+
export type { SearchResult, IndexingProgress, IndexStats, ChunkWithMeta } from "./indexing";
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# Codebase Indexing System
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The indexing system provides fast, ranked code search across the entire codebase using **SQLite FTS5** (full-text search with BM25 ranking) and optional **vector embeddings** (cosine similarity). It powers both the `codebase_search` tool and the `@codebase` context provider.
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
┌──────────────────────────────────────────────────┐
|
|
11
|
+
│ CodebaseIndexer │
|
|
12
|
+
│ │
|
|
13
|
+
│ 1. Scan files (respects .gitignore) │
|
|
14
|
+
│ 2. SHA-256 cache key per file │
|
|
15
|
+
│ 3. Diff against catalog (incremental updates) │
|
|
16
|
+
│ 4. Chunk files (code-aware / basic / markdown) │
|
|
17
|
+
│ 5. Insert into SQLite │
|
|
18
|
+
│ 6. Build FTS5 index (trigram tokenizer) │
|
|
19
|
+
│ 7. Optional: compute embeddings │
|
|
20
|
+
│ │
|
|
21
|
+
│ ┌──────────┐ ┌───────────┐ ┌──────────────┐ │
|
|
22
|
+
│ │ Chunks │ │ FTS5 │ │ Embeddings │ │
|
|
23
|
+
│ │ (SQLite) │ │ (BM25) │ │ (Vectors) │ │
|
|
24
|
+
│ └──────────┘ └───────────┘ └──────────────┘ │
|
|
25
|
+
│ │ │ │ │
|
|
26
|
+
│ └──────────────┴───────────────┘ │
|
|
27
|
+
│ │ │
|
|
28
|
+
│ search(query) │
|
|
29
|
+
│ FTS(35%) + Embeddings(65%) │
|
|
30
|
+
│ → deduplicated results │
|
|
31
|
+
└──────────────────────────────────────────────────┘
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## How It Works
|
|
35
|
+
|
|
36
|
+
### 1. File Scanning
|
|
37
|
+
|
|
38
|
+
- Recursively scans the working directory
|
|
39
|
+
- Respects `.gitignore` patterns
|
|
40
|
+
- Skips: `node_modules`, `.git`, `dist`, `build`, `__pycache__`, `coverage`, etc.
|
|
41
|
+
- Skips files > 1MB
|
|
42
|
+
- Supports 40+ file extensions (code, config, docs)
|
|
43
|
+
|
|
44
|
+
### 2. Incremental Updates
|
|
45
|
+
|
|
46
|
+
Files are tracked in a `index_catalog` table with SHA-256 content hashes:
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
index_catalog:
|
|
50
|
+
path, cacheKey (SHA-256), lastUpdated, directory
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
On each index run:
|
|
54
|
+
- **New files** → chunk + index
|
|
55
|
+
- **Modified files** (hash changed) → delete old chunks, re-index
|
|
56
|
+
- **Deleted files** → remove from all tables
|
|
57
|
+
- **Unchanged files** → skip entirely
|
|
58
|
+
|
|
59
|
+
This makes re-indexing fast — only changed files are processed.
|
|
60
|
+
|
|
61
|
+
### 3. Chunking Strategies
|
|
62
|
+
|
|
63
|
+
Three strategies based on file type:
|
|
64
|
+
|
|
65
|
+
#### Code-Aware Chunking (`.ts`, `.js`, `.py`, `.go`, `.rs`, etc.)
|
|
66
|
+
Splits on function/class boundaries using regex patterns:
|
|
67
|
+
- Function declarations, arrow functions, class definitions
|
|
68
|
+
- Method declarations, interfaces, type aliases, enums
|
|
69
|
+
- Python `def`/`class`, Go `func`, Rust `fn`/`struct`/`impl`
|
|
70
|
+
|
|
71
|
+
Falls back to line-based splitting if a chunk exceeds size limits.
|
|
72
|
+
|
|
73
|
+
#### Markdown Chunking (`.md`, `.mdx`)
|
|
74
|
+
Splits on header boundaries (`# H1`, `## H2`, `### H3`), keeping each section as a chunk.
|
|
75
|
+
|
|
76
|
+
#### Basic Chunking (everything else)
|
|
77
|
+
Line-based splitting with ~17 lines per chunk (targeting ~384 tokens) and 3-line overlap between chunks.
|
|
78
|
+
|
|
79
|
+
All strategies:
|
|
80
|
+
- Target ~384 tokens per chunk (~1400 chars)
|
|
81
|
+
- Merge tiny chunks (< 100 chars) with neighbors
|
|
82
|
+
- Track `startLine` and `endLine` for source mapping
|
|
83
|
+
|
|
84
|
+
### 4. SQLite FTS5 Full-Text Search
|
|
85
|
+
|
|
86
|
+
Uses SQLite's FTS5 extension with **trigram tokenizer** for substring matching:
|
|
87
|
+
|
|
88
|
+
```sql
|
|
89
|
+
CREATE VIRTUAL TABLE fts USING fts5(
|
|
90
|
+
path,
|
|
91
|
+
content,
|
|
92
|
+
tokenize = 'trigram'
|
|
93
|
+
);
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**How trigram works:** Text is broken into overlapping 3-character sequences. "function" → "fun", "unc", "nct", "cti", "tio", "ion". This enables substring matching without word boundaries.
|
|
97
|
+
|
|
98
|
+
**BM25 ranking** with 10x path boost:
|
|
99
|
+
```sql
|
|
100
|
+
ORDER BY bm25(fts, 10.0)
|
|
101
|
+
```
|
|
102
|
+
Matches in file paths (e.g., searching "auth" matches `src/auth/middleware.ts`) rank 10x higher than matches in content only.
|
|
103
|
+
|
|
104
|
+
### 5. Vector Embeddings (Optional)
|
|
105
|
+
|
|
106
|
+
When an `EmbeddingProvider` is configured:
|
|
107
|
+
|
|
108
|
+
1. Each chunk's text is sent to the embedding model (batched)
|
|
109
|
+
2. Vectors stored in SQLite as JSON arrays
|
|
110
|
+
3. Search uses **cosine similarity** computed in-process:
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
cosine_similarity(query_vector, chunk_vector) = dot(a,b) / (|a| * |b|)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
No external vector database needed — all stored in the same SQLite file.
|
|
117
|
+
|
|
118
|
+
### 6. Combined Search Pipeline
|
|
119
|
+
|
|
120
|
+
When `search(query)` is called:
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
query
|
|
124
|
+
├─ FTS5 search (35% of results)
|
|
125
|
+
│ └─ BM25 ranking with path boost
|
|
126
|
+
│
|
|
127
|
+
├─ Embedding search (65% of results)
|
|
128
|
+
│ └─ Cosine similarity ranking
|
|
129
|
+
│
|
|
130
|
+
└─ Deduplicate by (path + startLine)
|
|
131
|
+
└─ Return top-k results
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
If no embedding provider is configured, falls back to FTS-only.
|
|
135
|
+
|
|
136
|
+
## Database Schema
|
|
137
|
+
|
|
138
|
+
```sql
|
|
139
|
+
-- File chunks with line ranges
|
|
140
|
+
chunks (
|
|
141
|
+
id INTEGER PRIMARY KEY,
|
|
142
|
+
path TEXT, -- relative file path
|
|
143
|
+
cacheKey TEXT, -- SHA-256 hash of file content
|
|
144
|
+
content TEXT, -- chunk text
|
|
145
|
+
startLine INTEGER,
|
|
146
|
+
endLine INTEGER,
|
|
147
|
+
idx INTEGER -- chunk index within file
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
-- FTS5 virtual table (trigram tokenizer, BM25 ranking)
|
|
151
|
+
fts (path, content)
|
|
152
|
+
|
|
153
|
+
-- Links FTS entries to chunks
|
|
154
|
+
fts_metadata (
|
|
155
|
+
id INTEGER PRIMARY KEY, -- matches fts rowid
|
|
156
|
+
path TEXT,
|
|
157
|
+
cacheKey TEXT,
|
|
158
|
+
chunkId INTEGER → chunks(id)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
-- Vector embeddings
|
|
162
|
+
embeddings (
|
|
163
|
+
id INTEGER PRIMARY KEY,
|
|
164
|
+
chunkId INTEGER → chunks(id),
|
|
165
|
+
path TEXT,
|
|
166
|
+
cacheKey TEXT,
|
|
167
|
+
vector TEXT, -- JSON array of floats
|
|
168
|
+
model TEXT -- embedding model identifier
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
-- Tracks indexed files for incremental updates
|
|
172
|
+
index_catalog (
|
|
173
|
+
path TEXT,
|
|
174
|
+
cacheKey TEXT, -- SHA-256 of file content
|
|
175
|
+
lastUpdated INTEGER,
|
|
176
|
+
directory TEXT
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Storage location: `~/.cdoing/index.sqlite`
|
|
181
|
+
|
|
182
|
+
## Usage
|
|
183
|
+
|
|
184
|
+
### As a Tool (`codebase_search`)
|
|
185
|
+
|
|
186
|
+
The LLM calls this tool to search the codebase:
|
|
187
|
+
|
|
188
|
+
```
|
|
189
|
+
codebase_search({ query: "authentication middleware" })
|
|
190
|
+
codebase_search({ query: "sendEmail function", directory: "src/services" })
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
The index is built lazily on first use and refreshed if stale (>1 hour).
|
|
194
|
+
|
|
195
|
+
### As a Context Provider (`@codebase`)
|
|
196
|
+
|
|
197
|
+
Users type `@codebase auth middleware` to attach relevant code to their message.
|
|
198
|
+
|
|
199
|
+
### Programmatic API
|
|
200
|
+
|
|
201
|
+
```typescript
|
|
202
|
+
import { CodebaseIndexer } from "@cdoing/core";
|
|
203
|
+
|
|
204
|
+
const indexer = new CodebaseIndexer("/path/to/project");
|
|
205
|
+
|
|
206
|
+
// Index (incremental)
|
|
207
|
+
await indexer.index((progress) => {
|
|
208
|
+
console.log(`${progress.phase}: ${progress.message}`);
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
// Search
|
|
212
|
+
const results = await indexer.search("authentication", 20);
|
|
213
|
+
for (const r of results) {
|
|
214
|
+
console.log(`${r.path}:${r.startLine} (${r.source}, score: ${r.score})`);
|
|
215
|
+
console.log(r.content);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// With embeddings
|
|
219
|
+
import { CodebaseIndexer, type EmbeddingProvider } from "@cdoing/core";
|
|
220
|
+
|
|
221
|
+
const embedder: EmbeddingProvider = {
|
|
222
|
+
modelId: "text-embedding-3-small",
|
|
223
|
+
embed: async (texts) => {
|
|
224
|
+
// Call OpenAI/Ollama/etc.
|
|
225
|
+
return vectors;
|
|
226
|
+
},
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
const indexer = new CodebaseIndexer("/path/to/project", embedder);
|
|
230
|
+
await indexer.index();
|
|
231
|
+
const results = await indexer.search("how does auth work?"); // Uses both FTS + embeddings
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Comparison with Continue.dev
|
|
235
|
+
|
|
236
|
+
| Feature | Cdoing | Continue |
|
|
237
|
+
|---|---|---|
|
|
238
|
+
| Storage | Single SQLite file | SQLite + LanceDB (separate) |
|
|
239
|
+
| FTS | FTS5 with trigram tokenizer | FTS5 with trigram tokenizer |
|
|
240
|
+
| BM25 path boost | 10x | 10x |
|
|
241
|
+
| Chunking | Regex boundary detection | Tree-sitter AST |
|
|
242
|
+
| Code structure | Heuristic (function/class patterns) | Full AST parsing (15+ languages) |
|
|
243
|
+
| Embeddings | SQLite JSON + in-process cosine sim | LanceDB native vector search |
|
|
244
|
+
| Incremental updates | SHA-256 content hashing | SHA-256 with cross-branch dedup |
|
|
245
|
+
| Cross-branch cache | Not yet | Content-addressed global cache |
|
|
246
|
+
| Search pipeline | FTS(35%) + Embeddings(65%) | FTS(25%) + Embeddings(50%) + Recent(25%) |
|
|
247
|
+
| Dependencies | better-sqlite3 only | better-sqlite3 + LanceDB native |
|
|
248
|
+
|
|
249
|
+
### Design Trade-offs
|
|
250
|
+
|
|
251
|
+
**Why SQLite for embeddings instead of LanceDB?**
|
|
252
|
+
- Zero additional native dependencies (LanceDB requires platform-specific binaries)
|
|
253
|
+
- Single file for all index data
|
|
254
|
+
- In-process cosine similarity is fast enough for codebases < 100k chunks
|
|
255
|
+
- Simpler deployment and no platform compatibility issues
|
|
256
|
+
|
|
257
|
+
**Why regex chunking instead of tree-sitter?**
|
|
258
|
+
- Tree-sitter requires per-language WASM binaries (~2MB each)
|
|
259
|
+
- Regex patterns cover 90% of function/class boundary detection
|
|
260
|
+
- Falls back gracefully to line-based chunking
|
|
261
|
+
- Much smaller package size
|
|
262
|
+
|
|
263
|
+
**Future improvements:**
|
|
264
|
+
- Add tree-sitter for more accurate code structure analysis
|
|
265
|
+
- Add recently-edited file cache as a retrieval source
|
|
266
|
+
- Add cross-branch content deduplication
|
|
267
|
+
- Add reranking model support for the retrieval pipeline
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Chunker — splits files into meaningful chunks for indexing.
|
|
3
|
+
*
|
|
4
|
+
* Two strategies:
|
|
5
|
+
* 1. Code-aware: splits on function/class boundaries (heuristic, no tree-sitter)
|
|
6
|
+
* 2. Basic: splits on line count with overlap
|
|
7
|
+
*
|
|
8
|
+
* Inspired by Continue's chunking but without tree-sitter dependency
|
|
9
|
+
* to keep the package lightweight.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import * as path from "path";
|
|
13
|
+
|
|
14
|
+
export interface Chunk {
|
|
15
|
+
content: string;
|
|
16
|
+
startLine: number;
|
|
17
|
+
endLine: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** Max tokens per chunk (~3.5 chars/token, target 384 tokens) */
|
|
21
|
+
const MAX_CHUNK_CHARS = 1400;
|
|
22
|
+
|
|
23
|
+
/** Overlap between chunks (lines) */
|
|
24
|
+
const OVERLAP_LINES = 3;
|
|
25
|
+
|
|
26
|
+
/** File extensions that support code-aware chunking */
|
|
27
|
+
const CODE_EXTENSIONS = new Set([
|
|
28
|
+
".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
|
|
29
|
+
".py", ".rb", ".go", ".rs", ".java", ".kt",
|
|
30
|
+
".c", ".cpp", ".h", ".hpp", ".cs",
|
|
31
|
+
".swift", ".scala", ".lua", ".php", ".pl",
|
|
32
|
+
".sh", ".bash", ".zsh",
|
|
33
|
+
]);
|
|
34
|
+
|
|
35
|
+
/** Patterns that indicate function/class boundaries */
|
|
36
|
+
const BOUNDARY_PATTERNS = [
|
|
37
|
+
/^(?:export\s+)?(?:async\s+)?function\s/, // function declarations
|
|
38
|
+
/^(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s+)?\(/, // arrow functions
|
|
39
|
+
/^(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s+)?function/,
|
|
40
|
+
/^(?:export\s+)?class\s/, // classes
|
|
41
|
+
/^(?:export\s+)?interface\s/, // interfaces
|
|
42
|
+
/^(?:export\s+)?type\s+\w+\s*=/, // type aliases
|
|
43
|
+
/^(?:export\s+)?enum\s/, // enums
|
|
44
|
+
/^\s*(?:public|private|protected|static|async)\s+\w+\s*\(/, // methods
|
|
45
|
+
/^def\s+\w+/, // Python functions
|
|
46
|
+
/^class\s+\w+/, // Python classes
|
|
47
|
+
/^func\s+/, // Go functions
|
|
48
|
+
/^(?:pub\s+)?(?:fn|struct|enum|impl|trait)\s/, // Rust
|
|
49
|
+
/^(?:public|private|protected)\s+(?:static\s+)?(?:\w+\s+)?\w+\s*\(/, // Java/C#
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Check if a file should be chunked.
|
|
54
|
+
*/
|
|
55
|
+
export function shouldChunk(filePath: string, contentLength: number): boolean {
|
|
56
|
+
if (contentLength === 0) return false;
|
|
57
|
+
if (contentLength > 1024 * 1024) return false; // Skip files > 1MB
|
|
58
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
59
|
+
if (!ext) return false;
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Chunk a file's content into meaningful pieces.
|
|
65
|
+
* Uses code-aware splitting for code files, basic splitting for others.
|
|
66
|
+
*/
|
|
67
|
+
export function chunkDocument(filePath: string, content: string): Chunk[] {
|
|
68
|
+
if (!content.trim()) return [];
|
|
69
|
+
|
|
70
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
71
|
+
|
|
72
|
+
if (CODE_EXTENSIONS.has(ext)) {
|
|
73
|
+
return codeChunker(content);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Markdown: split by headers
|
|
77
|
+
if (ext === ".md" || ext === ".mdx") {
|
|
78
|
+
return markdownChunker(content);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return basicChunker(content);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Code-aware chunking: split on function/class boundaries.
|
|
86
|
+
*/
|
|
87
|
+
function codeChunker(content: string): Chunk[] {
|
|
88
|
+
const lines = content.split("\n");
|
|
89
|
+
const chunks: Chunk[] = [];
|
|
90
|
+
|
|
91
|
+
// Find boundary lines
|
|
92
|
+
const boundaries: number[] = [0];
|
|
93
|
+
for (let i = 1; i < lines.length; i++) {
|
|
94
|
+
const trimmed = lines[i].trimStart();
|
|
95
|
+
if (BOUNDARY_PATTERNS.some((p) => p.test(trimmed))) {
|
|
96
|
+
boundaries.push(i);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
boundaries.push(lines.length);
|
|
100
|
+
|
|
101
|
+
// Create chunks from boundaries
|
|
102
|
+
for (let i = 0; i < boundaries.length - 1; i++) {
|
|
103
|
+
const start = boundaries[i];
|
|
104
|
+
const end = boundaries[i + 1];
|
|
105
|
+
const chunkLines = lines.slice(start, end);
|
|
106
|
+
const chunkContent = chunkLines.join("\n").trim();
|
|
107
|
+
|
|
108
|
+
if (!chunkContent) continue;
|
|
109
|
+
|
|
110
|
+
// If chunk is too large, split it further
|
|
111
|
+
if (chunkContent.length > MAX_CHUNK_CHARS * 2) {
|
|
112
|
+
const subChunks = basicChunkerFromLines(chunkLines, start);
|
|
113
|
+
chunks.push(...subChunks);
|
|
114
|
+
} else {
|
|
115
|
+
chunks.push({ content: chunkContent, startLine: start + 1, endLine: end });
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Merge tiny chunks (< 100 chars) with neighbors
|
|
120
|
+
return mergeSmallChunks(chunks);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Markdown chunking: split by headers.
|
|
125
|
+
*/
|
|
126
|
+
function markdownChunker(content: string): Chunk[] {
|
|
127
|
+
const lines = content.split("\n");
|
|
128
|
+
const chunks: Chunk[] = [];
|
|
129
|
+
let currentStart = 0;
|
|
130
|
+
let currentLines: string[] = [];
|
|
131
|
+
|
|
132
|
+
for (let i = 0; i < lines.length; i++) {
|
|
133
|
+
if (/^#{1,3}\s/.test(lines[i]) && currentLines.length > 0) {
|
|
134
|
+
const text = currentLines.join("\n").trim();
|
|
135
|
+
if (text) {
|
|
136
|
+
chunks.push({ content: text, startLine: currentStart + 1, endLine: i });
|
|
137
|
+
}
|
|
138
|
+
currentStart = i;
|
|
139
|
+
currentLines = [lines[i]];
|
|
140
|
+
} else {
|
|
141
|
+
currentLines.push(lines[i]);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Last section
|
|
146
|
+
const text = currentLines.join("\n").trim();
|
|
147
|
+
if (text) {
|
|
148
|
+
chunks.push({ content: text, startLine: currentStart + 1, endLine: lines.length });
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return mergeSmallChunks(chunks);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Basic line-based chunking with overlap.
|
|
156
|
+
*/
|
|
157
|
+
function basicChunker(content: string): Chunk[] {
|
|
158
|
+
return basicChunkerFromLines(content.split("\n"), 0);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function basicChunkerFromLines(lines: string[], lineOffset: number): Chunk[] {
|
|
162
|
+
const chunks: Chunk[] = [];
|
|
163
|
+
const targetLines = Math.ceil(MAX_CHUNK_CHARS / 80); // ~80 chars per line
|
|
164
|
+
|
|
165
|
+
for (let i = 0; i < lines.length; i += targetLines - OVERLAP_LINES) {
|
|
166
|
+
const end = Math.min(i + targetLines, lines.length);
|
|
167
|
+
const chunkLines = lines.slice(i, end);
|
|
168
|
+
const content = chunkLines.join("\n").trim();
|
|
169
|
+
|
|
170
|
+
if (content) {
|
|
171
|
+
chunks.push({
|
|
172
|
+
content,
|
|
173
|
+
startLine: lineOffset + i + 1,
|
|
174
|
+
endLine: lineOffset + end,
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (end >= lines.length) break;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return chunks;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function mergeSmallChunks(chunks: Chunk[], minSize = 100): Chunk[] {
|
|
185
|
+
if (chunks.length <= 1) return chunks;
|
|
186
|
+
|
|
187
|
+
const merged: Chunk[] = [];
|
|
188
|
+
let current = chunks[0];
|
|
189
|
+
|
|
190
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
191
|
+
if (current.content.length < minSize) {
|
|
192
|
+
// Merge with next
|
|
193
|
+
current = {
|
|
194
|
+
content: current.content + "\n\n" + chunks[i].content,
|
|
195
|
+
startLine: current.startLine,
|
|
196
|
+
endLine: chunks[i].endLine,
|
|
197
|
+
};
|
|
198
|
+
} else {
|
|
199
|
+
merged.push(current);
|
|
200
|
+
current = chunks[i];
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
merged.push(current);
|
|
204
|
+
|
|
205
|
+
return merged;
|
|
206
|
+
}
|