claude-eidetic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/config.d.ts +87 -0
  2. package/dist/config.js +65 -0
  3. package/dist/core/indexer.d.ts +18 -0
  4. package/dist/core/indexer.js +169 -0
  5. package/dist/core/preview.d.ts +14 -0
  6. package/dist/core/preview.js +61 -0
  7. package/dist/core/searcher.d.ts +24 -0
  8. package/dist/core/searcher.js +101 -0
  9. package/dist/core/snapshot-io.d.ts +6 -0
  10. package/dist/core/snapshot-io.js +39 -0
  11. package/dist/core/sync.d.ts +35 -0
  12. package/dist/core/sync.js +188 -0
  13. package/dist/embedding/factory.d.ts +17 -0
  14. package/dist/embedding/factory.js +41 -0
  15. package/dist/embedding/openai.d.ts +45 -0
  16. package/dist/embedding/openai.js +243 -0
  17. package/dist/embedding/truncate.d.ts +6 -0
  18. package/dist/embedding/truncate.js +14 -0
  19. package/dist/embedding/types.d.ts +18 -0
  20. package/dist/embedding/types.js +2 -0
  21. package/dist/errors.d.ts +17 -0
  22. package/dist/errors.js +21 -0
  23. package/dist/format.d.ts +12 -0
  24. package/dist/format.js +97 -0
  25. package/dist/index.d.ts +3 -0
  26. package/dist/index.js +109 -0
  27. package/dist/infra/qdrant-bootstrap.d.ts +2 -0
  28. package/dist/infra/qdrant-bootstrap.js +94 -0
  29. package/dist/paths.d.ts +11 -0
  30. package/dist/paths.js +41 -0
  31. package/dist/splitter/ast.d.ts +13 -0
  32. package/dist/splitter/ast.js +169 -0
  33. package/dist/splitter/line.d.ts +14 -0
  34. package/dist/splitter/line.js +109 -0
  35. package/dist/splitter/types.d.ts +11 -0
  36. package/dist/splitter/types.js +2 -0
  37. package/dist/state/registry.d.ts +8 -0
  38. package/dist/state/registry.js +33 -0
  39. package/dist/state/snapshot.d.ts +26 -0
  40. package/dist/state/snapshot.js +101 -0
  41. package/dist/tool-schemas.d.ts +135 -0
  42. package/dist/tool-schemas.js +162 -0
  43. package/dist/tools.d.ts +40 -0
  44. package/dist/tools.js +169 -0
  45. package/dist/vectordb/milvus.d.ts +33 -0
  46. package/dist/vectordb/milvus.js +328 -0
  47. package/dist/vectordb/qdrant.d.ts +51 -0
  48. package/dist/vectordb/qdrant.js +241 -0
  49. package/dist/vectordb/types.d.ts +35 -0
  50. package/dist/vectordb/types.js +2 -0
  51. package/package.json +62 -0
package/dist/format.js ADDED
@@ -0,0 +1,97 @@
1
+ import { listProjects } from './state/registry.js';
2
+ export function textResult(text) {
3
+ return { content: [{ type: 'text', text }] };
4
+ }
5
+ export function formatIndexResult(result, normalizedPath) {
6
+ const lines = [
7
+ `Indexing complete for ${normalizedPath}`,
8
+ '',
9
+ `| Metric | Value |`,
10
+ `|--------|-------|`,
11
+ `| Total files | ${result.totalFiles} |`,
12
+ `| Total chunks | ${result.totalChunks} |`,
13
+ `| Added files | ${result.addedFiles} |`,
14
+ `| Modified files | ${result.modifiedFiles} |`,
15
+ `| Removed files | ${result.removedFiles} |`,
16
+ `| Skipped (unchanged) | ${result.skippedFiles} |`,
17
+ `| Parse failures | ${result.parseFailures.length} |`,
18
+ `| Estimated tokens | ~${(result.estimatedTokens / 1000).toFixed(0)}K |`,
19
+ `| Estimated cost | $${result.estimatedCostUsd.toFixed(4)} |`,
20
+ `| Duration | ${(result.durationMs / 1000).toFixed(1)}s |`,
21
+ ];
22
+ if (result.parseFailures.length > 0) {
23
+ lines.push('');
24
+ lines.push('**Parse Failures:**');
25
+ const toShow = result.parseFailures.slice(0, 10);
26
+ for (const file of toShow) {
27
+ lines.push(`- ${file}`);
28
+ }
29
+ if (result.parseFailures.length > 10) {
30
+ lines.push(`- ... and ${result.parseFailures.length - 10} more`);
31
+ }
32
+ }
33
+ return lines.join('\n');
34
+ }
35
+ export function formatPreview(preview, rootPath) {
36
+ const lines = [`Preview for ${rootPath}:`, ''];
37
+ // Extension table
38
+ const sorted = Object.entries(preview.byExtension)
39
+ .sort((a, b) => b[1] - a[1]);
40
+ if (sorted.length > 0) {
41
+ lines.push('| Extension | Files |');
42
+ lines.push('|-----------|-------|');
43
+ for (const [ext, count] of sorted) {
44
+ lines.push(`| ${ext} | ${count.toLocaleString()} |`);
45
+ }
46
+ }
47
+ lines.push(`Total: ${preview.totalFiles.toLocaleString()} files`, '');
48
+ // Top directories
49
+ if (preview.topDirectories.length > 0) {
50
+ lines.push('Top directories:');
51
+ for (const { dir, count } of preview.topDirectories) {
52
+ lines.push(` ${dir}/: ${count.toLocaleString()} files`);
53
+ }
54
+ lines.push('');
55
+ }
56
+ // Cost estimate
57
+ const tokenStr = preview.estimatedTokens >= 1_000_000
58
+ ? `~${(preview.estimatedTokens / 1_000_000).toFixed(1)}M`
59
+ : `~${(preview.estimatedTokens / 1000).toFixed(0)}K`;
60
+ lines.push(`Estimated: ${tokenStr} tokens (~$${preview.estimatedCostUsd.toFixed(4)})`, '');
61
+ // Warnings
62
+ lines.push('Warnings:');
63
+ if (preview.warnings.length === 0) {
64
+ lines.push('- None');
65
+ }
66
+ else {
67
+ for (const w of preview.warnings) {
68
+ lines.push(`- ${w}`);
69
+ }
70
+ }
71
+ return lines.join('\n');
72
+ }
73
+ export function formatListIndexed(states) {
74
+ const registry = listProjects();
75
+ const pathToProject = new Map(Object.entries(registry).map(([name, p]) => [p, name]));
76
+ const lines = [`## Indexed Codebases (${states.length})\n`];
77
+ for (const s of states) {
78
+ const projectName = pathToProject.get(s.path);
79
+ const heading = projectName ? `${s.path} (project: \`${projectName}\`)` : s.path;
80
+ lines.push(`### ${heading}`);
81
+ lines.push(`- **Status:** ${s.status}`);
82
+ if (s.totalFiles)
83
+ lines.push(`- **Files:** ${s.totalFiles}`);
84
+ if (s.totalChunks)
85
+ lines.push(`- **Chunks:** ${s.totalChunks}`);
86
+ if (s.lastIndexed)
87
+ lines.push(`- **Last indexed:** ${s.lastIndexed}`);
88
+ if (s.status === 'indexing' && s.progress !== undefined) {
89
+ lines.push(`- **Progress:** ${s.progress}% — ${s.progressMessage ?? ''}`);
90
+ }
91
+ if (s.error)
92
+ lines.push(`- **Error:** ${s.error}`);
93
+ lines.push('');
94
+ }
95
+ return lines.join('\n');
96
+ }
97
+ //# sourceMappingURL=format.js.map
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
package/dist/index.js ADDED
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env node
2
+ // CRITICAL: Redirect console outputs to stderr BEFORE any imports
3
+ // Only MCP protocol messages should go to stdout
4
+ console.log = (...args) => {
5
+ process.stderr.write('[LOG] ' + args.join(' ') + '\n');
6
+ };
7
+ console.warn = (...args) => {
8
+ process.stderr.write('[WARN] ' + args.join(' ') + '\n');
9
+ };
10
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
11
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
12
+ import { ListToolsRequestSchema, CallToolRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
13
+ import { loadConfig } from './config.js';
14
+ import { createEmbedding } from './embedding/factory.js';
15
+ import { QdrantVectorDB } from './vectordb/qdrant.js';
16
+ import { bootstrapQdrant } from './infra/qdrant-bootstrap.js';
17
+ import { StateManager, cleanupOrphanedSnapshots } from './state/snapshot.js';
18
+ import { ToolHandlers } from './tools.js';
19
+ import { TOOL_DEFINITIONS } from './tool-schemas.js';
20
+ const WORKFLOW_GUIDANCE = `# Eidetic Code Search Workflow
21
+
22
+ **Before searching:** Ensure the codebase is indexed.
23
+ - \`list_indexed\` → see what's already indexed
24
+ - \`index_codebase(path="...", dryRun=true)\` → preview before indexing
25
+ - \`index_codebase(path="...")\` → index (incremental, only re-embeds changed files)
26
+
27
+ **Searching efficiently:**
28
+ - \`search_code(query="...")\` → returns compact table by default (~20 tokens/result)
29
+ - Review the table, then use Read tool to fetch full code for interesting results
30
+ - Add \`compact=false\` only when you need all code snippets immediately
31
+ - Use \`extensionFilter\` to narrow by file type
32
+ - Use \`project\` param instead of \`path\` for convenience
33
+ - Start with specific queries, broaden if no results
34
+
35
+ **After first index:**
36
+ - Re-indexing is incremental (only changed files re-embedded)
37
+ - Use \`project\` param instead of \`path\` for convenience
38
+ - Use \`get_indexing_status\` to check progress during long indexes
39
+
40
+ **Cross-project search:**
41
+ - Index multiple projects, each with its own path
42
+ - Search across any indexed project regardless of current working directory`;
43
+ async function main() {
44
+ const config = loadConfig();
45
+ console.log(`Config loaded. Provider: ${config.vectordbProvider}, Model: ${config.embeddingModel}`);
46
+ const embedding = createEmbedding(config);
47
+ await embedding.initialize();
48
+ let vectordb;
49
+ if (config.vectordbProvider === 'milvus') {
50
+ const { MilvusVectorDB } = await import('./vectordb/milvus.js');
51
+ vectordb = new MilvusVectorDB();
52
+ console.log(`Using Milvus at ${config.milvusAddress}`);
53
+ }
54
+ else {
55
+ const qdrantUrl = await bootstrapQdrant();
56
+ vectordb = new QdrantVectorDB(qdrantUrl);
57
+ console.log(`Using Qdrant at ${qdrantUrl}`);
58
+ }
59
+ const cleaned = await cleanupOrphanedSnapshots(vectordb);
60
+ if (cleaned > 0) {
61
+ console.log(`Cleaned ${cleaned} orphaned snapshot(s).`);
62
+ }
63
+ const state = new StateManager();
64
+ const handlers = new ToolHandlers(embedding, vectordb, state);
65
+ const server = new Server({ name: 'claude-eidetic', version: '0.1.0' }, { capabilities: { tools: {} } });
66
+ server.setRequestHandler(ListToolsRequestSchema, async () => ({
67
+ tools: [...TOOL_DEFINITIONS],
68
+ }));
69
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
70
+ const { name, arguments: args } = request.params;
71
+ switch (name) {
72
+ case 'index_codebase':
73
+ return handlers.handleIndexCodebase(args ?? {});
74
+ case 'search_code':
75
+ return handlers.handleSearchCode(args ?? {});
76
+ case 'clear_index':
77
+ return handlers.handleClearIndex(args ?? {});
78
+ case 'get_indexing_status':
79
+ return handlers.handleGetIndexingStatus(args ?? {});
80
+ case 'list_indexed':
81
+ return handlers.handleListIndexed();
82
+ case '__IMPORTANT':
83
+ return {
84
+ content: [{ type: 'text', text: WORKFLOW_GUIDANCE }],
85
+ };
86
+ default:
87
+ return {
88
+ content: [{ type: 'text', text: `Unknown tool: ${name}` }],
89
+ isError: true,
90
+ };
91
+ }
92
+ });
93
+ const transport = new StdioServerTransport();
94
+ await server.connect(transport);
95
+ console.log('Claude Eidetic MCP server started on stdio.');
96
+ }
97
+ process.on('SIGINT', () => {
98
+ console.error('Received SIGINT, shutting down...');
99
+ process.exit(0);
100
+ });
101
+ process.on('SIGTERM', () => {
102
+ console.error('Received SIGTERM, shutting down...');
103
+ process.exit(0);
104
+ });
105
+ main().catch((err) => {
106
+ console.error('Fatal error:', err);
107
+ process.exit(1);
108
+ });
109
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,2 @@
1
+ export declare function bootstrapQdrant(): Promise<string>;
2
+ //# sourceMappingURL=qdrant-bootstrap.d.ts.map
@@ -0,0 +1,94 @@
1
+ import { execFileSync } from 'node:child_process';
2
+ import path from 'node:path';
3
+ import { BootstrapError } from '../errors.js';
4
+ import { getConfig } from '../config.js';
5
+ import { getDataDir } from '../paths.js';
6
+ const CONTAINER_NAME = 'eidetic-qdrant';
7
+ const HEALTH_TIMEOUT_MS = 30_000;
8
+ const HEALTH_POLL_MS = 500;
9
+ export async function bootstrapQdrant() {
10
+ const config = getConfig();
11
+ const url = config.qdrantUrl;
12
+ if (await isQdrantHealthy(url)) {
13
+ console.log(`Qdrant reachable at ${url}`);
14
+ return url;
15
+ }
16
+ console.log(`Qdrant not reachable at ${url}. Attempting Docker auto-provision...`);
17
+ if (!isDockerAvailable()) {
18
+ throw new BootstrapError(`Qdrant not reachable at ${url} and Docker not found.\n` +
19
+ `Either: (a) install Docker and retry, or (b) set QDRANT_URL to your Qdrant instance.`);
20
+ }
21
+ const containerState = getContainerState();
22
+ if (containerState === 'running') {
23
+ console.log(`Container "${CONTAINER_NAME}" is running. Waiting for health...`);
24
+ }
25
+ else if (containerState === 'stopped') {
26
+ console.log(`Container "${CONTAINER_NAME}" exists but stopped. Starting...`);
27
+ execFileSync('docker', ['start', CONTAINER_NAME], { stdio: 'pipe' });
28
+ }
29
+ else {
30
+ const dataDir = path.join(getDataDir(), 'qdrant-data').replace(/\\/g, '/');
31
+ console.log(`Creating new Qdrant container "${CONTAINER_NAME}"...`);
32
+ execFileSync('docker', [
33
+ 'run', '-d',
34
+ '--name', CONTAINER_NAME,
35
+ '--restart', 'unless-stopped',
36
+ '-p', '6333:6333',
37
+ '-p', '6334:6334',
38
+ '-v', `${dataDir}:/qdrant/storage`,
39
+ 'qdrant/qdrant',
40
+ ], { stdio: 'pipe' });
41
+ }
42
+ const healthy = await waitForHealth(url, HEALTH_TIMEOUT_MS);
43
+ if (!healthy) {
44
+ throw new BootstrapError(`Qdrant container started but failed health check after ${HEALTH_TIMEOUT_MS / 1000}s. ` +
45
+ `Check: docker logs ${CONTAINER_NAME}`);
46
+ }
47
+ console.log(`Qdrant auto-provisioned and healthy at ${url}`);
48
+ return url;
49
+ }
50
+ async function isQdrantHealthy(url) {
51
+ try {
52
+ const resp = await fetch(`${url}/healthz`, { signal: AbortSignal.timeout(3000) });
53
+ return resp.ok;
54
+ }
55
+ catch {
56
+ return false;
57
+ }
58
+ }
59
+ function isDockerAvailable() {
60
+ try {
61
+ execFileSync('docker', ['info'], { stdio: 'pipe', timeout: 10_000 });
62
+ return true;
63
+ }
64
+ catch {
65
+ return false;
66
+ }
67
+ }
68
+ function getContainerState() {
69
+ try {
70
+ const output = execFileSync('docker', [
71
+ 'ps', '-a',
72
+ '--filter', `name=^/${CONTAINER_NAME}$`,
73
+ '--format', '{{.State}}',
74
+ ], { encoding: 'utf-8', stdio: 'pipe' }).trim();
75
+ if (!output)
76
+ return 'none';
77
+ if (output === 'running')
78
+ return 'running';
79
+ return 'stopped';
80
+ }
81
+ catch {
82
+ return 'none';
83
+ }
84
+ }
85
+ async function waitForHealth(url, timeoutMs) {
86
+ const start = Date.now();
87
+ while (Date.now() - start < timeoutMs) {
88
+ if (await isQdrantHealthy(url))
89
+ return true;
90
+ await new Promise(r => setTimeout(r, HEALTH_POLL_MS));
91
+ }
92
+ return false;
93
+ }
94
+ //# sourceMappingURL=qdrant-bootstrap.js.map
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Normalize a path to forward slashes, resolve to absolute, remove trailing slash.
3
+ * This is the single source of truth for path handling — called at every boundary.
4
+ */
5
+ export declare function normalizePath(inputPath: string): string;
6
+ export declare function getDataDir(): string;
7
+ export declare function getSnapshotDir(): string;
8
+ export declare function getCacheDir(): string;
9
+ export declare function getRegistryPath(): string;
10
+ export declare function pathToCollectionName(absolutePath: string): string;
11
+ //# sourceMappingURL=paths.d.ts.map
package/dist/paths.js ADDED
@@ -0,0 +1,41 @@
1
+ import path from 'node:path';
2
+ import os from 'node:os';
3
+ import { getConfig } from './config.js';
4
+ /**
5
+ * Normalize a path to forward slashes, resolve to absolute, remove trailing slash.
6
+ * This is the single source of truth for path handling — called at every boundary.
7
+ */
8
+ export function normalizePath(inputPath) {
9
+ let resolved = inputPath;
10
+ if (resolved.startsWith('~')) {
11
+ resolved = path.join(os.homedir(), resolved.slice(1));
12
+ }
13
+ resolved = path.resolve(resolved);
14
+ resolved = resolved.replace(/\\/g, '/');
15
+ if (resolved.length > 1 && resolved.endsWith('/')) {
16
+ resolved = resolved.slice(0, -1);
17
+ }
18
+ return resolved;
19
+ }
20
+ export function getDataDir() {
21
+ return normalizePath(getConfig().eideticDataDir);
22
+ }
23
+ export function getSnapshotDir() {
24
+ return `${getDataDir()}/snapshots`;
25
+ }
26
+ export function getCacheDir() {
27
+ return `${getDataDir()}/cache`;
28
+ }
29
+ export function getRegistryPath() {
30
+ return `${getDataDir()}/registry.json`;
31
+ }
32
+ export function pathToCollectionName(absolutePath) {
33
+ const normalized = normalizePath(absolutePath);
34
+ const safe = normalized
35
+ .toLowerCase()
36
+ .replace(/[^a-z0-9]/g, '_')
37
+ .replace(/_+/g, '_')
38
+ .replace(/^_|_$/g, '');
39
+ return `eidetic_${safe}`;
40
+ }
41
+ //# sourceMappingURL=paths.js.map
@@ -0,0 +1,13 @@
1
+ import type { Splitter, CodeChunk } from './types.js';
2
+ export declare class AstSplitter implements Splitter {
3
+ private parser;
4
+ private currentLang;
5
+ private static langCache;
6
+ private static resolveLanguage;
7
+ split(code: string, language: string, filePath: string): CodeChunk[];
8
+ static isSupported(language: string): boolean;
9
+ private extractChunks;
10
+ private refineChunks;
11
+ private splitLargeChunk;
12
+ }
13
+ //# sourceMappingURL=ast.d.ts.map
@@ -0,0 +1,169 @@
1
+ import { createRequire } from 'node:module';
2
+ // tree-sitter and language parsers are native CommonJS modules
3
+ const require = createRequire(import.meta.url);
4
+ const Parser = require('tree-sitter');
5
+ // Lazy-load language parsers to avoid startup cost for unused languages
6
+ const languageParsers = {
7
+ javascript: () => require('tree-sitter-javascript'),
8
+ js: () => require('tree-sitter-javascript'),
9
+ typescript: () => require('tree-sitter-typescript').typescript,
10
+ ts: () => require('tree-sitter-typescript').typescript,
11
+ tsx: () => require('tree-sitter-typescript').tsx,
12
+ python: () => require('tree-sitter-python'),
13
+ py: () => require('tree-sitter-python'),
14
+ go: () => require('tree-sitter-go'),
15
+ java: () => require('tree-sitter-java'),
16
+ rust: () => require('tree-sitter-rust'),
17
+ rs: () => require('tree-sitter-rust'),
18
+ cpp: () => require('tree-sitter-cpp'),
19
+ 'c++': () => require('tree-sitter-cpp'),
20
+ c: () => require('tree-sitter-cpp'),
21
+ csharp: () => require('tree-sitter-c-sharp'),
22
+ cs: () => require('tree-sitter-c-sharp'),
23
+ };
24
+ // AST node types that represent logical code units per language
25
+ const SPLITTABLE_TYPES = {
26
+ javascript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement'],
27
+ typescript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement', 'interface_declaration', 'type_alias_declaration'],
28
+ tsx: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement', 'interface_declaration', 'type_alias_declaration'],
29
+ python: ['function_definition', 'class_definition', 'decorated_definition', 'async_function_definition'],
30
+ java: ['method_declaration', 'class_declaration', 'interface_declaration', 'constructor_declaration'],
31
+ cpp: ['function_definition', 'class_specifier', 'namespace_definition', 'declaration'],
32
+ go: ['function_declaration', 'method_declaration', 'type_declaration', 'var_declaration', 'const_declaration'],
33
+ rust: ['function_item', 'impl_item', 'struct_item', 'enum_item', 'trait_item', 'mod_item'],
34
+ csharp: ['method_declaration', 'class_declaration', 'interface_declaration', 'struct_declaration', 'enum_declaration'],
35
+ };
36
+ // Map aliases to canonical language names for node type lookup
37
+ const LANG_CANONICAL = {
38
+ js: 'javascript', ts: 'typescript', py: 'python',
39
+ rs: 'rust', 'c++': 'cpp', c: 'cpp', cs: 'csharp',
40
+ };
41
+ const MAX_CHUNK_CHARS = 2500;
42
+ export class AstSplitter {
43
+ parser = new Parser();
44
+ currentLang = '';
45
+ // Shared across all AstSplitter instances — one cache per process
46
+ static langCache = new Map();
47
+ static resolveLanguage(lang) {
48
+ // Resolve alias to canonical name first — prevents duplicate cache entries
49
+ const canonical = LANG_CANONICAL[lang] ?? lang;
50
+ const cached = AstSplitter.langCache.get(canonical);
51
+ if (cached)
52
+ return cached;
53
+ const factory = languageParsers[canonical] ?? languageParsers[lang];
54
+ if (!factory)
55
+ return null;
56
+ try {
57
+ const mod = factory();
58
+ AstSplitter.langCache.set(canonical, mod);
59
+ return mod;
60
+ }
61
+ catch (err) {
62
+ console.warn(`Failed to load tree-sitter parser for "${lang}": ${err}`);
63
+ return null;
64
+ }
65
+ }
66
+ split(code, language, filePath) {
67
+ const lang = language.toLowerCase();
68
+ const canonical = LANG_CANONICAL[lang] ?? lang;
69
+ const langModule = AstSplitter.resolveLanguage(lang);
70
+ if (!langModule) {
71
+ return []; // Caller should fall back to line splitter
72
+ }
73
+ try {
74
+ // Skip setLanguage() if parser is already configured for this language
75
+ if (canonical !== this.currentLang) {
76
+ this.parser.setLanguage(langModule);
77
+ this.currentLang = canonical;
78
+ }
79
+ const tree = this.parser.parse(code);
80
+ if (!tree.rootNode)
81
+ return [];
82
+ const nodeTypes = SPLITTABLE_TYPES[canonical] ?? [];
83
+ const rawChunks = this.extractChunks(tree.rootNode, code, nodeTypes, language, filePath);
84
+ // If no meaningful chunks found, return empty (caller will use line splitter)
85
+ if (rawChunks.length === 0)
86
+ return [];
87
+ return this.refineChunks(rawChunks);
88
+ }
89
+ catch (err) {
90
+ console.warn(`AST parse failed for "${filePath}" (${language}): ${err}`);
91
+ return []; // Caller should fall back to line splitter
92
+ }
93
+ }
94
+ static isSupported(language) {
95
+ return language.toLowerCase() in languageParsers;
96
+ }
97
+ extractChunks(node, code, splittableTypes, language, filePath) {
98
+ const chunks = [];
99
+ const traverse = (current) => {
100
+ if (splittableTypes.includes(current.type)) {
101
+ const text = code.slice(current.startIndex, current.endIndex);
102
+ if (text.trim().length > 0) {
103
+ chunks.push({
104
+ content: text,
105
+ startLine: current.startPosition.row + 1,
106
+ endLine: current.endPosition.row + 1,
107
+ language,
108
+ filePath,
109
+ });
110
+ }
111
+ }
112
+ for (const child of current.children) {
113
+ traverse(child);
114
+ }
115
+ };
116
+ traverse(node);
117
+ return chunks;
118
+ }
119
+ refineChunks(chunks) {
120
+ const result = [];
121
+ for (const chunk of chunks) {
122
+ if (chunk.content.length <= MAX_CHUNK_CHARS) {
123
+ result.push(chunk);
124
+ }
125
+ else {
126
+ result.push(...this.splitLargeChunk(chunk));
127
+ }
128
+ }
129
+ return result;
130
+ }
131
+ splitLargeChunk(chunk) {
132
+ const lines = chunk.content.split('\n');
133
+ const subChunks = [];
134
+ let current = '';
135
+ let startLine = chunk.startLine;
136
+ let lineCount = 0;
137
+ for (let i = 0; i < lines.length; i++) {
138
+ const line = lines[i];
139
+ const addition = i < lines.length - 1 ? line + '\n' : line;
140
+ if (current.length + addition.length > MAX_CHUNK_CHARS && current.length > 0) {
141
+ subChunks.push({
142
+ content: current,
143
+ startLine,
144
+ endLine: startLine + lineCount - 1,
145
+ language: chunk.language,
146
+ filePath: chunk.filePath,
147
+ });
148
+ current = addition;
149
+ startLine = chunk.startLine + i;
150
+ lineCount = 1;
151
+ }
152
+ else {
153
+ current += addition;
154
+ lineCount++;
155
+ }
156
+ }
157
+ if (current.trim().length > 0) {
158
+ subChunks.push({
159
+ content: current,
160
+ startLine,
161
+ endLine: startLine + lineCount - 1,
162
+ language: chunk.language,
163
+ filePath: chunk.filePath,
164
+ });
165
+ }
166
+ return subChunks;
167
+ }
168
+ }
169
+ //# sourceMappingURL=ast.js.map
@@ -0,0 +1,14 @@
1
+ import type { Splitter, CodeChunk } from './types.js';
2
+ /**
3
+ * Simple line-based splitter. Used as fallback when tree-sitter
4
+ * doesn't support the language or fails to parse.
5
+ */
6
+ export declare class LineSplitter implements Splitter {
7
+ private chunkLines;
8
+ private overlapLines;
9
+ constructor(chunkLines?: number, overlapLines?: number);
10
+ split(code: string, language: string, filePath: string): CodeChunk[];
11
+ private refineChunks;
12
+ private splitLargeChunk;
13
+ }
14
+ //# sourceMappingURL=line.d.ts.map
@@ -0,0 +1,109 @@
1
+ const DEFAULT_CHUNK_LINES = 60;
2
+ const OVERLAP_LINES = 5;
3
+ const MAX_CHUNK_CHARS = 2500;
4
+ /**
5
+ * Simple line-based splitter. Used as fallback when tree-sitter
6
+ * doesn't support the language or fails to parse.
7
+ */
8
+ export class LineSplitter {
9
+ chunkLines;
10
+ overlapLines;
11
+ constructor(chunkLines = DEFAULT_CHUNK_LINES, overlapLines = OVERLAP_LINES) {
12
+ this.chunkLines = chunkLines;
13
+ this.overlapLines = Math.min(overlapLines, chunkLines - 1);
14
+ }
15
+ split(code, language, filePath) {
16
+ const lines = code.split('\n');
17
+ if (lines.length === 0)
18
+ return [];
19
+ const raw = [];
20
+ let start = 0;
21
+ while (start < lines.length) {
22
+ const end = Math.min(start + this.chunkLines, lines.length);
23
+ const content = lines.slice(start, end).join('\n');
24
+ if (content.trim().length > 0) {
25
+ raw.push({
26
+ content,
27
+ startLine: start + 1,
28
+ endLine: end,
29
+ language,
30
+ filePath,
31
+ });
32
+ }
33
+ start = Math.max(start + 1, end - this.overlapLines);
34
+ }
35
+ return this.refineChunks(raw);
36
+ }
37
+ refineChunks(chunks) {
38
+ const result = [];
39
+ for (const chunk of chunks) {
40
+ if (chunk.content.length <= MAX_CHUNK_CHARS) {
41
+ result.push(chunk);
42
+ }
43
+ else {
44
+ result.push(...this.splitLargeChunk(chunk));
45
+ }
46
+ }
47
+ return result;
48
+ }
49
+ splitLargeChunk(chunk) {
50
+ const lines = chunk.content.split('\n');
51
+ const subChunks = [];
52
+ let current = '';
53
+ let startLine = chunk.startLine;
54
+ let lineCount = 0;
55
+ const flush = () => {
56
+ if (current.trim().length > 0) {
57
+ subChunks.push({
58
+ content: current,
59
+ startLine,
60
+ endLine: startLine + lineCount - 1,
61
+ language: chunk.language,
62
+ filePath: chunk.filePath,
63
+ });
64
+ }
65
+ };
66
+ for (let i = 0; i < lines.length; i++) {
67
+ const line = lines[i];
68
+ const addition = i < lines.length - 1 ? line + '\n' : line;
69
+ if (current.length + addition.length > MAX_CHUNK_CHARS && current.length > 0) {
70
+ flush();
71
+ current = '';
72
+ startLine = chunk.startLine + i;
73
+ lineCount = 0;
74
+ }
75
+ // If a single line exceeds the limit, hard-split it by characters
76
+ if (addition.length > MAX_CHUNK_CHARS) {
77
+ // Flush anything accumulated before this line
78
+ if (current.length > 0) {
79
+ flush();
80
+ current = '';
81
+ startLine = chunk.startLine + i;
82
+ lineCount = 0;
83
+ }
84
+ const lineNum = chunk.startLine + i;
85
+ for (let offset = 0; offset < addition.length; offset += MAX_CHUNK_CHARS) {
86
+ const slice = addition.slice(offset, offset + MAX_CHUNK_CHARS);
87
+ if (slice.trim().length > 0) {
88
+ subChunks.push({
89
+ content: slice,
90
+ startLine: lineNum,
91
+ endLine: lineNum,
92
+ language: chunk.language,
93
+ filePath: chunk.filePath,
94
+ });
95
+ }
96
+ }
97
+ startLine = chunk.startLine + i + 1;
98
+ lineCount = 0;
99
+ }
100
+ else {
101
+ current += addition;
102
+ lineCount++;
103
+ }
104
+ }
105
+ flush();
106
+ return subChunks;
107
+ }
108
+ }
109
+ //# sourceMappingURL=line.js.map