claude-eidetic 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/README.md +333 -0
  2. package/dist/config.d.ts +25 -0
  3. package/dist/config.js +29 -10
  4. package/dist/core/cleanup.d.ts +8 -0
  5. package/dist/core/cleanup.js +41 -0
  6. package/dist/core/doc-indexer.d.ts +13 -0
  7. package/dist/core/doc-indexer.js +76 -0
  8. package/dist/core/doc-searcher.d.ts +13 -0
  9. package/dist/core/doc-searcher.js +65 -0
  10. package/dist/core/file-category.d.ts +7 -0
  11. package/dist/core/file-category.js +75 -0
  12. package/dist/core/indexer.js +12 -4
  13. package/dist/core/preview.d.ts +1 -2
  14. package/dist/core/preview.js +2 -5
  15. package/dist/core/repo-map.d.ts +33 -0
  16. package/dist/core/repo-map.js +144 -0
  17. package/dist/core/searcher.d.ts +1 -13
  18. package/dist/core/searcher.js +20 -24
  19. package/dist/core/snapshot-io.js +2 -2
  20. package/dist/core/sync.d.ts +5 -25
  21. package/dist/core/sync.js +90 -65
  22. package/dist/core/targeted-indexer.d.ts +19 -0
  23. package/dist/core/targeted-indexer.js +127 -0
  24. package/dist/embedding/factory.d.ts +0 -13
  25. package/dist/embedding/factory.js +0 -17
  26. package/dist/embedding/openai.d.ts +2 -14
  27. package/dist/embedding/openai.js +7 -20
  28. package/dist/errors.d.ts +2 -0
  29. package/dist/errors.js +2 -0
  30. package/dist/format.d.ts +12 -0
  31. package/dist/format.js +160 -31
  32. package/dist/hooks/post-tool-use.d.ts +13 -0
  33. package/dist/hooks/post-tool-use.js +113 -0
  34. package/dist/hooks/stop-hook.d.ts +11 -0
  35. package/dist/hooks/stop-hook.js +121 -0
  36. package/dist/hooks/targeted-runner.d.ts +11 -0
  37. package/dist/hooks/targeted-runner.js +66 -0
  38. package/dist/index.js +68 -9
  39. package/dist/infra/qdrant-bootstrap.js +14 -12
  40. package/dist/memory/history.d.ts +19 -0
  41. package/dist/memory/history.js +40 -0
  42. package/dist/memory/llm.d.ts +2 -0
  43. package/dist/memory/llm.js +56 -0
  44. package/dist/memory/prompts.d.ts +5 -0
  45. package/dist/memory/prompts.js +36 -0
  46. package/dist/memory/reconciler.d.ts +12 -0
  47. package/dist/memory/reconciler.js +36 -0
  48. package/dist/memory/store.d.ts +20 -0
  49. package/dist/memory/store.js +206 -0
  50. package/dist/memory/types.d.ts +28 -0
  51. package/dist/memory/types.js +2 -0
  52. package/dist/paths.d.ts +3 -4
  53. package/dist/paths.js +14 -4
  54. package/dist/precompact/hook.d.ts +9 -0
  55. package/dist/precompact/hook.js +170 -0
  56. package/dist/precompact/index-runner.d.ts +9 -0
  57. package/dist/precompact/index-runner.js +52 -0
  58. package/dist/precompact/note-writer.d.ts +15 -0
  59. package/dist/precompact/note-writer.js +109 -0
  60. package/dist/precompact/session-indexer.d.ts +13 -0
  61. package/dist/precompact/session-indexer.js +31 -0
  62. package/dist/precompact/tier0-inject.d.ts +16 -0
  63. package/dist/precompact/tier0-inject.js +88 -0
  64. package/dist/precompact/tier0-writer.d.ts +16 -0
  65. package/dist/precompact/tier0-writer.js +74 -0
  66. package/dist/precompact/transcript-parser.d.ts +10 -0
  67. package/dist/precompact/transcript-parser.js +148 -0
  68. package/dist/precompact/types.d.ts +93 -0
  69. package/dist/precompact/types.js +5 -0
  70. package/dist/precompact/utils.d.ts +29 -0
  71. package/dist/precompact/utils.js +95 -0
  72. package/dist/setup-message.d.ts +2 -2
  73. package/dist/setup-message.js +39 -20
  74. package/dist/splitter/ast.js +84 -22
  75. package/dist/splitter/line.d.ts +0 -4
  76. package/dist/splitter/line.js +1 -7
  77. package/dist/splitter/symbol-extract.d.ts +16 -0
  78. package/dist/splitter/symbol-extract.js +61 -0
  79. package/dist/splitter/types.d.ts +5 -0
  80. package/dist/splitter/types.js +1 -1
  81. package/dist/state/doc-metadata.d.ts +18 -0
  82. package/dist/state/doc-metadata.js +59 -0
  83. package/dist/state/registry.d.ts +1 -3
  84. package/dist/state/snapshot.d.ts +0 -1
  85. package/dist/state/snapshot.js +3 -19
  86. package/dist/tool-schemas.d.ts +251 -1
  87. package/dist/tool-schemas.js +307 -0
  88. package/dist/tools.d.ts +69 -0
  89. package/dist/tools.js +286 -17
  90. package/dist/vectordb/milvus.d.ts +7 -5
  91. package/dist/vectordb/milvus.js +116 -19
  92. package/dist/vectordb/qdrant.d.ts +8 -10
  93. package/dist/vectordb/qdrant.js +105 -33
  94. package/dist/vectordb/types.d.ts +20 -0
  95. package/messages.yaml +50 -0
  96. package/package.json +31 -6
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Parse Claude Code transcript JSONL to extract session data.
3
+ * Extracts deterministic data from tool calls - no LLM needed.
4
+ */
5
+ import fs from 'node:fs';
6
+ import readline from 'node:readline';
7
+ import { truncateUnicode } from './utils.js';
8
+ const MAX_BASH_COMMANDS = 20;
9
+ const MAX_USER_MESSAGES = 5;
10
+ const BASH_COMMAND_MAX_LENGTH = 120;
11
+ const USER_MESSAGE_MAX_LENGTH = 200;
12
+ /**
13
+ * Parse a transcript JSONL file and extract session data.
14
+ */
15
+ export async function parseTranscript(transcriptPath, sessionId, projectName, projectPath, trigger = 'auto') {
16
+ const filesModified = new Set();
17
+ const bashCommands = [];
18
+ const mcpToolsCalled = new Set();
19
+ const tasksCreated = [];
20
+ const tasksUpdated = [];
21
+ const taskIdToSubject = new Map();
22
+ const userMessages = [];
23
+ let branch = null;
24
+ let startTime = null;
25
+ let endTime = null;
26
+ const fileStream = fs.createReadStream(transcriptPath);
27
+ const rl = readline.createInterface({
28
+ input: fileStream,
29
+ crlfDelay: Infinity,
30
+ });
31
+ for await (const line of rl) {
32
+ if (!line.trim())
33
+ continue;
34
+ let parsed;
35
+ try {
36
+ parsed = JSON.parse(line);
37
+ }
38
+ catch {
39
+ // Skip malformed JSON lines
40
+ continue;
41
+ }
42
+ // Extract timestamps
43
+ if (parsed.timestamp) {
44
+ if (!startTime)
45
+ startTime = parsed.timestamp;
46
+ endTime = parsed.timestamp;
47
+ }
48
+ // Extract git branch from first entry that has it
49
+ if (!branch && parsed.gitBranch) {
50
+ branch = parsed.gitBranch;
51
+ }
52
+ // Extract user messages
53
+ if (parsed.type === 'user' && userMessages.length < MAX_USER_MESSAGES) {
54
+ const text = extractUserText(parsed);
55
+ if (text) {
56
+ userMessages.push(truncateUnicode(text, USER_MESSAGE_MAX_LENGTH));
57
+ }
58
+ }
59
+ // Extract tool calls from assistant messages
60
+ if (parsed.type === 'assistant' && parsed.message?.content) {
61
+ for (const content of parsed.message.content) {
62
+ if (content.type !== 'tool_use')
63
+ continue;
64
+ const toolContent = content;
65
+ processToolCall(toolContent, {
66
+ filesModified,
67
+ bashCommands,
68
+ mcpToolsCalled,
69
+ tasksCreated,
70
+ tasksUpdated,
71
+ taskIdToSubject,
72
+ });
73
+ }
74
+ }
75
+ }
76
+ return {
77
+ sessionId,
78
+ projectName,
79
+ projectPath,
80
+ branch,
81
+ startTime: startTime ?? 'unknown',
82
+ endTime: endTime ?? 'unknown',
83
+ filesModified: Array.from(filesModified).sort(),
84
+ bashCommands,
85
+ mcpToolsCalled: Array.from(mcpToolsCalled).sort(),
86
+ tasksCreated,
87
+ tasksUpdated,
88
+ userMessages,
89
+ trigger,
90
+ };
91
+ }
92
+ function processToolCall(content, state) {
93
+ const { name, input } = content;
94
+ // File modifications
95
+ if (name === 'Write' || name === 'Edit') {
96
+ const filePath = input.file_path;
97
+ if (typeof filePath === 'string') {
98
+ state.filesModified.add(filePath);
99
+ }
100
+ }
101
+ // Bash commands (enforce limit during collection)
102
+ if (name === 'Bash' && state.bashCommands.length < MAX_BASH_COMMANDS) {
103
+ const command = input.command;
104
+ if (typeof command === 'string') {
105
+ state.bashCommands.push(truncateUnicode(command, BASH_COMMAND_MAX_LENGTH));
106
+ }
107
+ }
108
+ // Task operations - track subject by taskId for later updates
109
+ if (name === 'TaskCreate') {
110
+ const subject = input.subject;
111
+ const taskId = input.taskId;
112
+ if (typeof subject === 'string') {
113
+ state.tasksCreated.push(subject);
114
+ // Track subject by taskId if available (for future TaskUpdate lookups)
115
+ if (typeof taskId === 'string') {
116
+ state.taskIdToSubject.set(taskId, subject);
117
+ }
118
+ }
119
+ }
120
+ if (name === 'TaskUpdate') {
121
+ const taskId = input.taskId;
122
+ const status = input.status;
123
+ // Try to get subject from input first, then from tracked tasks
124
+ let subject = input.subject;
125
+ if (typeof subject !== 'string' && typeof taskId === 'string') {
126
+ subject = state.taskIdToSubject.get(taskId);
127
+ }
128
+ if (typeof subject === 'string' && typeof status === 'string') {
129
+ state.tasksUpdated.push(`${subject} → ${status}`);
130
+ }
131
+ }
132
+ // MCP tools
133
+ if (name.startsWith('mcp__')) {
134
+ state.mcpToolsCalled.add(name);
135
+ }
136
+ }
137
+ function extractUserText(line) {
138
+ const content = line.message?.content;
139
+ if (!Array.isArray(content))
140
+ return null;
141
+ for (const block of content) {
142
+ if (block.type === 'text' && typeof block.text === 'string') {
143
+ return block.text;
144
+ }
145
+ }
146
+ return null;
147
+ }
148
+ //# sourceMappingURL=transcript-parser.js.map
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Types for PreCompact hook - automatic session persistence before context compaction.
3
+ */
4
+ /**
5
+ * Input received from Claude Code PreCompact hook via stdin.
6
+ */
7
+ export interface PreCompactInput {
8
+ session_id: string;
9
+ transcript_path: string;
10
+ cwd: string;
11
+ trigger: 'auto' | 'manual';
12
+ hook_event_name: 'PreCompact';
13
+ }
14
+ /**
15
+ * Input received from Claude Code SessionEnd hook via stdin.
16
+ */
17
+ export interface SessionEndInput {
18
+ session_id: string;
19
+ transcript_path: string;
20
+ cwd: string;
21
+ hook_event_name: 'SessionEnd';
22
+ reason?: string;
23
+ }
24
+ /**
25
+ * Session data extracted from transcript JSONL.
26
+ * Contains deterministic data parsed directly from tool calls.
27
+ */
28
+ export interface ExtractedSession {
29
+ sessionId: string;
30
+ projectName: string;
31
+ projectPath: string;
32
+ branch: string | null;
33
+ startTime: string;
34
+ endTime: string;
35
+ filesModified: string[];
36
+ bashCommands: string[];
37
+ mcpToolsCalled: string[];
38
+ tasksCreated: string[];
39
+ tasksUpdated: string[];
40
+ userMessages: string[];
41
+ trigger: 'auto' | 'manual' | 'session_end';
42
+ }
43
+ /**
44
+ * Compact session record for Tier-0 fast lookup.
45
+ * Stored in .session-index.json for instant SessionStart injection.
46
+ */
47
+ export interface Tier0Record {
48
+ sessionId: string;
49
+ date: string;
50
+ branch: string | null;
51
+ filesModified: string[];
52
+ tasksCreated: string[];
53
+ trigger: 'auto' | 'manual' | 'session_end';
54
+ noteFile: string;
55
+ }
56
+ /**
57
+ * Session index for a project - enables fast SessionStart context injection.
58
+ * Stored at ~/.eidetic/notes/<project>/.session-index.json
59
+ */
60
+ export interface SessionIndex {
61
+ project: string;
62
+ sessions: Tier0Record[];
63
+ lastUpdated: string;
64
+ }
65
+ /**
66
+ * A single line from the Claude Code transcript JSONL.
67
+ */
68
+ export interface TranscriptLine {
69
+ type: 'user' | 'assistant' | 'system';
70
+ timestamp?: string;
71
+ gitBranch?: string;
72
+ message?: {
73
+ role?: string;
74
+ content?: TranscriptContent[];
75
+ };
76
+ }
77
+ /**
78
+ * Content block within a transcript message.
79
+ */
80
+ export type TranscriptContent = {
81
+ type: 'text';
82
+ text: string;
83
+ } | {
84
+ type: 'tool_use';
85
+ id: string;
86
+ name: string;
87
+ input: Record<string, unknown>;
88
+ } | {
89
+ type: 'tool_result';
90
+ tool_use_id: string;
91
+ content: unknown;
92
+ };
93
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Types for PreCompact hook - automatic session persistence before context compaction.
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Shared utilities for precompact module.
3
+ */
4
+ /**
5
+ * Extract YYYY-MM-DD date from ISO timestamp or return today's date.
6
+ */
7
+ export declare function extractDate(timestamp: string): string;
8
+ /**
9
+ * Get the notes directory for a project.
10
+ * Uses paths.ts normalization for consistency.
11
+ */
12
+ export declare function getNotesDir(projectName: string): string;
13
+ /**
14
+ * Truncate a string to maxLength with proper Unicode handling.
15
+ * Avoids splitting surrogate pairs (emoji, CJK characters).
16
+ * Adds ellipsis if truncated.
17
+ */
18
+ export declare function truncateUnicode(str: string, maxLength: number): string;
19
+ /**
20
+ * Write file atomically using write-to-temp-then-rename pattern.
21
+ * Prevents corruption from concurrent writes or process termination.
22
+ */
23
+ export declare function writeFileAtomic(filePath: string, content: string): void;
24
+ /**
25
+ * Generate a stable project identifier from path.
26
+ * Handles project name collisions by including path hash.
27
+ */
28
+ export declare function getProjectId(projectPath: string): string;
29
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Shared utilities for precompact module.
3
+ */
4
+ import path from 'node:path';
5
+ import fs from 'node:fs';
6
+ import { getConfig } from '../config.js';
7
+ import { normalizePath } from '../paths.js';
8
+ /**
9
+ * Extract YYYY-MM-DD date from ISO timestamp or return today's date.
10
+ */
11
+ export function extractDate(timestamp) {
12
+ if (timestamp === 'unknown' || !timestamp) {
13
+ return new Date().toISOString().slice(0, 10);
14
+ }
15
+ // Handle ISO format: 2026-02-19T10:00:00Z
16
+ const match = /^(\d{4}-\d{2}-\d{2})/.exec(timestamp);
17
+ if (match) {
18
+ return match[1];
19
+ }
20
+ return new Date().toISOString().slice(0, 10);
21
+ }
22
+ /**
23
+ * Get the notes directory for a project.
24
+ * Uses paths.ts normalization for consistency.
25
+ */
26
+ export function getNotesDir(projectName) {
27
+ const config = getConfig();
28
+ // Expand ~ and normalize path
29
+ const dataDir = normalizePath(config.eideticDataDir);
30
+ return path.join(dataDir, 'notes', projectName);
31
+ }
32
+ /**
33
+ * Truncate a string to maxLength with proper Unicode handling.
34
+ * Avoids splitting surrogate pairs (emoji, CJK characters).
35
+ * Adds ellipsis if truncated.
36
+ */
37
+ export function truncateUnicode(str, maxLength) {
38
+ if (str.length <= maxLength)
39
+ return str;
40
+ // Convert to array of code points to handle surrogate pairs correctly
41
+ const codePoints = Array.from(str);
42
+ if (codePoints.length <= maxLength)
43
+ return str;
44
+ // Leave room for ellipsis
45
+ const truncated = codePoints.slice(0, maxLength - 1).join('');
46
+ return truncated + '…';
47
+ }
48
+ /**
49
+ * Write file atomically using write-to-temp-then-rename pattern.
50
+ * Prevents corruption from concurrent writes or process termination.
51
+ */
52
+ export function writeFileAtomic(filePath, content) {
53
+ const dir = path.dirname(filePath);
54
+ fs.mkdirSync(dir, { recursive: true });
55
+ // Create temp file in same directory (required for atomic rename)
56
+ const tempPath = path.join(dir, `.tmp-${process.pid}-${Date.now()}`);
57
+ try {
58
+ fs.writeFileSync(tempPath, content, 'utf-8');
59
+ fs.renameSync(tempPath, filePath);
60
+ }
61
+ catch (err) {
62
+ // Clean up temp file on failure
63
+ try {
64
+ fs.unlinkSync(tempPath);
65
+ }
66
+ catch {
67
+ // Ignore cleanup errors
68
+ }
69
+ throw err;
70
+ }
71
+ }
72
+ /**
73
+ * Generate a stable project identifier from path.
74
+ * Handles project name collisions by including path hash.
75
+ */
76
+ export function getProjectId(projectPath) {
77
+ const normalized = normalizePath(projectPath);
78
+ const basename = path.basename(normalized);
79
+ // Create short hash of full path to disambiguate same-named projects
80
+ const hash = simpleHash(normalized).slice(0, 6);
81
+ return `${basename}-${hash}`;
82
+ }
83
+ /**
84
+ * Simple non-cryptographic hash for path disambiguation.
85
+ */
86
+ function simpleHash(str) {
87
+ let hash = 0;
88
+ for (let i = 0; i < str.length; i++) {
89
+ const char = str.charCodeAt(i);
90
+ hash = (hash << 5) - hash + char;
91
+ hash = hash & hash; // Convert to 32-bit integer
92
+ }
93
+ return Math.abs(hash).toString(36);
94
+ }
95
+ //# sourceMappingURL=utils.js.map
@@ -1,3 +1,3 @@
1
- /** Centralized setup/error messages easy to update links and instructions. */
2
- export declare function getSetupErrorMessage(errorDetail: string): string;
1
+ export type SetupContext = 'missing' | 'invalid' | 'unknown';
2
+ export declare function getSetupErrorMessage(errorDetail: string, context?: SetupContext): string;
3
3
  //# sourceMappingURL=setup-message.d.ts.map
@@ -1,23 +1,42 @@
1
- /** Centralized setup/error messages easy to update links and instructions. */
2
- const OPENAI_KEY_URL = 'https://platform.openai.com/api-keys';
3
- export function getSetupErrorMessage(errorDetail) {
4
- return (`Eidetic setup required: ${errorDetail}\n\n` +
1
+ import { readFileSync } from 'node:fs';
2
+ import { fileURLToPath } from 'node:url';
3
+ import { dirname, join } from 'node:path';
4
+ import { parse as parseYaml } from 'yaml';
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = dirname(__filename);
7
+ const yamlPath = join(__dirname, '..', 'messages.yaml');
8
+ let _cached = null;
9
+ function loadMessages() {
10
+ if (_cached)
11
+ return _cached;
12
+ _cached = parseYaml(readFileSync(yamlPath, 'utf-8'));
13
+ return _cached;
14
+ }
15
+ function detectContext() {
16
+ const hasKey = !!process.env.OPENAI_API_KEY;
17
+ const isOllama = process.env.EMBEDDING_PROVIDER === 'ollama';
18
+ if (!hasKey && !isOllama)
19
+ return 'missing';
20
+ return 'invalid';
21
+ }
22
+ export function getSetupErrorMessage(errorDetail, context) {
23
+ const ctx = context ?? detectContext();
24
+ const msgs = loadMessages();
25
+ const block = msgs.setup[ctx];
26
+ const header = block.header.replace('{error}', errorDetail);
27
+ const diagnosis = block.diagnosis.trim() ? `**Diagnosis:** ${block.diagnosis.trim()}\n\n` : '';
28
+ return (`${header}\n\n` +
29
+ diagnosis +
5
30
  '## How to fix\n\n' +
6
- `1. **Get an API key**: ${OPENAI_KEY_URL}\n` +
7
- '2. **Add it to your config** (pick one):\n\n' +
8
- '### Quick (CLI)\n\n' +
9
- '```\nclaude mcp update claude-eidetic -e OPENAI_API_KEY=sk-your-key-here\n```\n\n' +
10
- '### Manual\n\n' +
11
- 'Edit `~/.claude/plugins/claude-eidetic/config.json`:\n' +
12
- '```json\n' +
13
- '{\n' +
14
- ' "env": {\n' +
15
- ' "OPENAI_API_KEY": "sk-your-key-here"\n' +
16
- ' }\n' +
17
- '}\n' +
18
- '```\n\n' +
19
- '### Using Ollama instead (free, local)\n\n' +
20
- 'Set `EMBEDDING_PROVIDER=ollama` in the same env block (requires Ollama running locally with `nomic-embed-text`).\n\n' +
21
- '3. **Restart Claude Code** for changes to take effect.');
31
+ `1. ${block.step1}\n` +
32
+ '2. **Set or update your config** (pick one):\n\n' +
33
+ msgs.setup.config_instructions +
34
+ `3. ${msgs.setup.footer}`);
35
+ }
36
+ // Called by plugin/hooks/session-start.sh
37
+ if (process.argv[1] === __filename) {
38
+ const context = process.argv[2] ?? 'missing';
39
+ const detail = process.argv[3] ?? 'OPENAI_API_KEY is not set.';
40
+ console.log(JSON.stringify({ additionalContext: getSetupErrorMessage(detail, context) }));
22
41
  }
23
42
  //# sourceMappingURL=setup-message.js.map
@@ -1,4 +1,6 @@
1
1
  import { createRequire } from 'node:module';
2
+ import { MAX_CHUNK_CHARS } from './types.js';
3
+ import { extractSymbolInfo, isContainerType } from './symbol-extract.js';
2
4
  // tree-sitter and language parsers are native CommonJS modules
3
5
  const require = createRequire(import.meta.url);
4
6
  const Parser = require('tree-sitter');
@@ -23,29 +25,67 @@ const languageParsers = {
23
25
  };
24
26
  // AST node types that represent logical code units per language
25
27
  const SPLITTABLE_TYPES = {
26
- javascript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement'],
27
- typescript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement', 'interface_declaration', 'type_alias_declaration'],
28
- tsx: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement', 'interface_declaration', 'type_alias_declaration'],
29
- python: ['function_definition', 'class_definition', 'decorated_definition', 'async_function_definition'],
30
- java: ['method_declaration', 'class_declaration', 'interface_declaration', 'constructor_declaration'],
28
+ javascript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition'],
29
+ typescript: [
30
+ 'function_declaration',
31
+ 'arrow_function',
32
+ 'class_declaration',
33
+ 'method_definition',
34
+ 'interface_declaration',
35
+ 'type_alias_declaration',
36
+ ],
37
+ tsx: [
38
+ 'function_declaration',
39
+ 'arrow_function',
40
+ 'class_declaration',
41
+ 'method_definition',
42
+ 'interface_declaration',
43
+ 'type_alias_declaration',
44
+ ],
45
+ python: [
46
+ 'function_definition',
47
+ 'class_definition',
48
+ 'decorated_definition',
49
+ 'async_function_definition',
50
+ ],
51
+ java: [
52
+ 'method_declaration',
53
+ 'class_declaration',
54
+ 'interface_declaration',
55
+ 'constructor_declaration',
56
+ ],
31
57
  cpp: ['function_definition', 'class_specifier', 'namespace_definition', 'declaration'],
32
- go: ['function_declaration', 'method_declaration', 'type_declaration', 'var_declaration', 'const_declaration'],
58
+ go: [
59
+ 'function_declaration',
60
+ 'method_declaration',
61
+ 'type_declaration',
62
+ 'var_declaration',
63
+ 'const_declaration',
64
+ ],
33
65
  rust: ['function_item', 'impl_item', 'struct_item', 'enum_item', 'trait_item', 'mod_item'],
34
- csharp: ['method_declaration', 'class_declaration', 'interface_declaration', 'struct_declaration', 'enum_declaration'],
66
+ csharp: [
67
+ 'method_declaration',
68
+ 'class_declaration',
69
+ 'interface_declaration',
70
+ 'struct_declaration',
71
+ 'enum_declaration',
72
+ ],
35
73
  };
36
- // Map aliases to canonical language names for node type lookup
37
74
  const LANG_CANONICAL = {
38
- js: 'javascript', ts: 'typescript', py: 'python',
39
- rs: 'rust', 'c++': 'cpp', c: 'cpp', cs: 'csharp',
75
+ js: 'javascript',
76
+ ts: 'typescript',
77
+ py: 'python',
78
+ rs: 'rust',
79
+ 'c++': 'cpp',
80
+ c: 'cpp',
81
+ cs: 'csharp',
40
82
  };
41
- const MAX_CHUNK_CHARS = 2500;
42
83
  export class AstSplitter {
43
84
  parser = new Parser();
44
85
  currentLang = '';
45
86
  // Shared across all AstSplitter instances — one cache per process
46
87
  static langCache = new Map();
47
88
  static resolveLanguage(lang) {
48
- // Resolve alias to canonical name first — prevents duplicate cache entries
49
89
  const canonical = LANG_CANONICAL[lang] ?? lang;
50
90
  const cached = AstSplitter.langCache.get(canonical);
51
91
  if (cached)
@@ -59,7 +99,7 @@ export class AstSplitter {
59
99
  return mod;
60
100
  }
61
101
  catch (err) {
62
- console.warn(`Failed to load tree-sitter parser for "${lang}": ${err}`);
102
+ console.warn(`Failed to load tree-sitter parser for "${lang}":`, err);
63
103
  return null;
64
104
  }
65
105
  }
@@ -68,10 +108,9 @@ export class AstSplitter {
68
108
  const canonical = LANG_CANONICAL[lang] ?? lang;
69
109
  const langModule = AstSplitter.resolveLanguage(lang);
70
110
  if (!langModule) {
71
- return []; // Caller should fall back to line splitter
111
+ return [];
72
112
  }
73
113
  try {
74
- // Skip setLanguage() if parser is already configured for this language
75
114
  if (canonical !== this.currentLang) {
76
115
  this.parser.setLanguage(langModule);
77
116
  this.currentLang = canonical;
@@ -81,14 +120,13 @@ export class AstSplitter {
81
120
  return [];
82
121
  const nodeTypes = SPLITTABLE_TYPES[canonical] ?? [];
83
122
  const rawChunks = this.extractChunks(tree.rootNode, code, nodeTypes, language, filePath);
84
- // If no meaningful chunks found, return empty (caller will use line splitter)
85
123
  if (rawChunks.length === 0)
86
124
  return [];
87
125
  return this.refineChunks(rawChunks);
88
126
  }
89
127
  catch (err) {
90
- console.warn(`AST parse failed for "${filePath}" (${language}): ${err}`);
91
- return []; // Caller should fall back to line splitter
128
+ console.warn(`AST parse failed for "${filePath}" (${language}):`, err);
129
+ return [];
92
130
  }
93
131
  }
94
132
  static isSupported(language) {
@@ -96,21 +134,37 @@ export class AstSplitter {
96
134
  }
97
135
  extractChunks(node, code, splittableTypes, language, filePath) {
98
136
  const chunks = [];
99
- const traverse = (current) => {
137
+ const traverse = (current, parentName) => {
100
138
  if (splittableTypes.includes(current.type)) {
101
139
  const text = code.slice(current.startIndex, current.endIndex);
102
140
  if (text.trim().length > 0) {
103
- chunks.push({
141
+ const symbolInfo = extractSymbolInfo(current, code, parentName);
142
+ const chunk = {
104
143
  content: text,
105
144
  startLine: current.startPosition.row + 1,
106
145
  endLine: current.endPosition.row + 1,
107
146
  language,
108
147
  filePath,
109
- });
148
+ };
149
+ if (symbolInfo) {
150
+ chunk.symbolName = symbolInfo.name;
151
+ chunk.symbolKind = symbolInfo.kind;
152
+ chunk.symbolSignature = symbolInfo.signature;
153
+ if (parentName)
154
+ chunk.parentSymbol = parentName;
155
+ }
156
+ chunks.push(chunk);
157
+ // If this is a container, pass its name as parentName to children
158
+ if (isContainerType(current.type) && symbolInfo?.name) {
159
+ for (const child of current.children) {
160
+ traverse(child, symbolInfo.name);
161
+ }
162
+ return;
163
+ }
110
164
  }
111
165
  }
112
166
  for (const child of current.children) {
113
- traverse(child);
167
+ traverse(child, parentName);
114
168
  }
115
169
  };
116
170
  traverse(node);
@@ -144,6 +198,10 @@ export class AstSplitter {
144
198
  endLine: startLine + lineCount - 1,
145
199
  language: chunk.language,
146
200
  filePath: chunk.filePath,
201
+ symbolName: chunk.symbolName,
202
+ symbolKind: chunk.symbolKind,
203
+ symbolSignature: chunk.symbolSignature,
204
+ parentSymbol: chunk.parentSymbol,
147
205
  });
148
206
  current = addition;
149
207
  startLine = chunk.startLine + i;
@@ -161,6 +219,10 @@ export class AstSplitter {
161
219
  endLine: startLine + lineCount - 1,
162
220
  language: chunk.language,
163
221
  filePath: chunk.filePath,
222
+ symbolName: chunk.symbolName,
223
+ symbolKind: chunk.symbolKind,
224
+ symbolSignature: chunk.symbolSignature,
225
+ parentSymbol: chunk.parentSymbol,
164
226
  });
165
227
  }
166
228
  return subChunks;
@@ -1,8 +1,4 @@
1
1
  import type { Splitter, CodeChunk } from './types.js';
2
- /**
3
- * Simple line-based splitter. Used as fallback when tree-sitter
4
- * doesn't support the language or fails to parse.
5
- */
6
2
  export declare class LineSplitter implements Splitter {
7
3
  private chunkLines;
8
4
  private overlapLines;
@@ -1,10 +1,6 @@
1
+ import { MAX_CHUNK_CHARS } from './types.js';
1
2
  const DEFAULT_CHUNK_LINES = 60;
2
3
  const OVERLAP_LINES = 5;
3
- const MAX_CHUNK_CHARS = 2500;
4
- /**
5
- * Simple line-based splitter. Used as fallback when tree-sitter
6
- * doesn't support the language or fails to parse.
7
- */
8
4
  export class LineSplitter {
9
5
  chunkLines;
10
6
  overlapLines;
@@ -72,9 +68,7 @@ export class LineSplitter {
72
68
  startLine = chunk.startLine + i;
73
69
  lineCount = 0;
74
70
  }
75
- // If a single line exceeds the limit, hard-split it by characters
76
71
  if (addition.length > MAX_CHUNK_CHARS) {
77
- // Flush anything accumulated before this line
78
72
  if (current.length > 0) {
79
73
  flush();
80
74
  current = '';
@@ -0,0 +1,16 @@
1
+ export interface SymbolInfo {
2
+ name: string;
3
+ kind: string;
4
+ signature: string;
5
+ }
6
+ interface AstNode {
7
+ type: string;
8
+ startIndex: number;
9
+ endIndex: number;
10
+ children: AstNode[];
11
+ text?: string;
12
+ }
13
+ export declare function extractSymbolInfo(node: AstNode, code: string, parentName?: string): SymbolInfo | undefined;
14
+ export declare function isContainerType(nodeType: string): boolean;
15
+ export {};
16
+ //# sourceMappingURL=symbol-extract.d.ts.map