claude-eidetic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/config.d.ts +87 -0
  2. package/dist/config.js +65 -0
  3. package/dist/core/indexer.d.ts +18 -0
  4. package/dist/core/indexer.js +169 -0
  5. package/dist/core/preview.d.ts +14 -0
  6. package/dist/core/preview.js +61 -0
  7. package/dist/core/searcher.d.ts +24 -0
  8. package/dist/core/searcher.js +101 -0
  9. package/dist/core/snapshot-io.d.ts +6 -0
  10. package/dist/core/snapshot-io.js +39 -0
  11. package/dist/core/sync.d.ts +35 -0
  12. package/dist/core/sync.js +188 -0
  13. package/dist/embedding/factory.d.ts +17 -0
  14. package/dist/embedding/factory.js +41 -0
  15. package/dist/embedding/openai.d.ts +45 -0
  16. package/dist/embedding/openai.js +243 -0
  17. package/dist/embedding/truncate.d.ts +6 -0
  18. package/dist/embedding/truncate.js +14 -0
  19. package/dist/embedding/types.d.ts +18 -0
  20. package/dist/embedding/types.js +2 -0
  21. package/dist/errors.d.ts +17 -0
  22. package/dist/errors.js +21 -0
  23. package/dist/format.d.ts +12 -0
  24. package/dist/format.js +97 -0
  25. package/dist/index.d.ts +3 -0
  26. package/dist/index.js +109 -0
  27. package/dist/infra/qdrant-bootstrap.d.ts +2 -0
  28. package/dist/infra/qdrant-bootstrap.js +94 -0
  29. package/dist/paths.d.ts +11 -0
  30. package/dist/paths.js +41 -0
  31. package/dist/splitter/ast.d.ts +13 -0
  32. package/dist/splitter/ast.js +169 -0
  33. package/dist/splitter/line.d.ts +14 -0
  34. package/dist/splitter/line.js +109 -0
  35. package/dist/splitter/types.d.ts +11 -0
  36. package/dist/splitter/types.js +2 -0
  37. package/dist/state/registry.d.ts +8 -0
  38. package/dist/state/registry.js +33 -0
  39. package/dist/state/snapshot.d.ts +26 -0
  40. package/dist/state/snapshot.js +101 -0
  41. package/dist/tool-schemas.d.ts +135 -0
  42. package/dist/tool-schemas.js +162 -0
  43. package/dist/tools.d.ts +40 -0
  44. package/dist/tools.js +169 -0
  45. package/dist/vectordb/milvus.d.ts +33 -0
  46. package/dist/vectordb/milvus.js +328 -0
  47. package/dist/vectordb/qdrant.d.ts +51 -0
  48. package/dist/vectordb/qdrant.js +241 -0
  49. package/dist/vectordb/types.d.ts +35 -0
  50. package/dist/vectordb/types.js +2 -0
  51. package/package.json +62 -0
@@ -0,0 +1,188 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { createHash } from 'node:crypto';
4
+ import { glob } from 'glob';
5
+ const DEFAULT_EXTENSIONS = new Set([
6
+ '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
7
+ '.py', '.pyi',
8
+ '.go',
9
+ '.java',
10
+ '.rs',
11
+ '.cpp', '.cc', '.cxx', '.c', '.h', '.hpp',
12
+ '.cs',
13
+ '.scala',
14
+ '.rb',
15
+ '.php',
16
+ '.swift',
17
+ '.kt', '.kts',
18
+ '.lua',
19
+ '.sh', '.bash', '.zsh',
20
+ '.sql',
21
+ '.r', '.R',
22
+ '.m', '.mm', // Objective-C
23
+ '.dart',
24
+ '.ex', '.exs', // Elixir
25
+ '.erl', '.hrl', // Erlang
26
+ '.hs', // Haskell
27
+ '.ml', '.mli', // OCaml
28
+ '.vue', '.svelte', '.astro',
29
+ '.yaml', '.yml',
30
+ '.toml',
31
+ '.json',
32
+ '.md', '.mdx',
33
+ '.html', '.css', '.scss', '.less',
34
+ ]);
35
+ const DEFAULT_IGNORE = [
36
+ '**/node_modules/**',
37
+ '**/.git/**',
38
+ '**/dist/**',
39
+ '**/build/**',
40
+ '**/.next/**',
41
+ '**/target/**',
42
+ '**/__pycache__/**',
43
+ '**/.venv/**',
44
+ '**/venv/**',
45
+ '**/vendor/**',
46
+ '**/.cache/**',
47
+ '**/coverage/**',
48
+ '**/*.min.js',
49
+ '**/*.min.css',
50
+ '**/package-lock.json',
51
+ '**/pnpm-lock.yaml',
52
+ '**/yarn.lock',
53
+ ];
54
+ /**
55
+ * Scan a directory and return relative paths of indexable files.
56
+ * Respects .gitignore if present.
57
+ */
58
+ export async function scanFiles(rootPath, customExtensions = [], customIgnore = []) {
59
+ const extensions = new Set([...DEFAULT_EXTENSIONS, ...customExtensions]);
60
+ // Read .gitignore patterns if present
61
+ const gitignorePatterns = readGitignore(rootPath);
62
+ const allIgnore = [...DEFAULT_IGNORE, ...gitignorePatterns, ...customIgnore];
63
+ const files = await glob('**/*', {
64
+ cwd: rootPath,
65
+ nodir: true,
66
+ dot: false,
67
+ ignore: allIgnore,
68
+ absolute: false,
69
+ });
70
+ return files
71
+ .filter(f => extensions.has(path.extname(f).toLowerCase()))
72
+ .sort();
73
+ }
74
+ /**
75
+ * Compute a truncated SHA-256 hash of a file's contents.
76
+ * 16 hex chars (64 bits) is sufficient for change detection — collisions
77
+ * would only cause a redundant re-index, not data loss.
78
+ */
79
+ function hashFileContent(fullPath) {
80
+ const content = fs.readFileSync(fullPath);
81
+ return createHash('sha256').update(content).digest('hex').slice(0, 16);
82
+ }
83
+ /**
84
+ * Build a size+contentHash snapshot for a list of files.
85
+ */
86
+ export function buildSnapshot(rootPath, relativePaths) {
87
+ const snapshot = {};
88
+ for (const rel of relativePaths) {
89
+ const fullPath = path.join(rootPath, rel);
90
+ try {
91
+ const contentHash = hashFileContent(fullPath);
92
+ snapshot[rel] = { contentHash };
93
+ }
94
+ catch (err) {
95
+ console.warn(`Skipping "${rel}": ${err}`);
96
+ }
97
+ }
98
+ return snapshot;
99
+ }
100
+ /**
101
+ * Compare current snapshot to a previous one. Returns added, modified, and removed files.
102
+ * Uses content hash as the authoritative change signal — immune to git ops, IDE formatters,
103
+ * NFS clock skew, and other mtime-only pitfalls.
104
+ */
105
+ export function diffSnapshots(previous, current) {
106
+ const added = [];
107
+ const modified = [];
108
+ const removed = [];
109
+ for (const [rel, cur] of Object.entries(current)) {
110
+ const prev = previous[rel];
111
+ if (!prev) {
112
+ added.push(rel);
113
+ }
114
+ else if (prev.contentHash !== cur.contentHash) {
115
+ modified.push(rel);
116
+ }
117
+ }
118
+ for (const rel of Object.keys(previous)) {
119
+ if (!(rel in current)) {
120
+ removed.push(rel);
121
+ }
122
+ }
123
+ return { added, modified, removed };
124
+ }
125
+ /**
126
+ * Parse .gitignore content into glob patterns.
127
+ * Pure function — no filesystem access.
128
+ */
129
+ export function parseGitignorePatterns(content) {
130
+ return content
131
+ .split('\n')
132
+ .map(line => line.trim())
133
+ .filter(line => line && !line.startsWith('#') && !line.startsWith('!'))
134
+ .map(pattern => {
135
+ // Strip trailing spaces (gitignore spec)
136
+ pattern = pattern.replace(/\s+$/, '');
137
+ // Directory-only patterns: trailing /
138
+ if (pattern.endsWith('/')) {
139
+ pattern = pattern.slice(0, -1);
140
+ }
141
+ // Rooted patterns: leading /
142
+ if (pattern.startsWith('/'))
143
+ return pattern.slice(1);
144
+ // Unrooted patterns without / match anywhere
145
+ if (!pattern.includes('/'))
146
+ return `**/${pattern}`;
147
+ return pattern;
148
+ })
149
+ .filter(p => p.length > 0);
150
+ }
151
+ function readGitignore(rootPath) {
152
+ const gitignorePath = path.join(rootPath, '.gitignore');
153
+ try {
154
+ const content = fs.readFileSync(gitignorePath, 'utf-8');
155
+ return parseGitignorePatterns(content);
156
+ }
157
+ catch {
158
+ return [];
159
+ }
160
+ }
161
+ /**
162
+ * Map file extension to language name for the splitter.
163
+ */
164
+ export function extensionToLanguage(ext) {
165
+ const map = {
166
+ '.ts': 'typescript', '.tsx': 'tsx',
167
+ '.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
168
+ '.py': 'python', '.pyi': 'python',
169
+ '.go': 'go',
170
+ '.java': 'java',
171
+ '.rs': 'rust',
172
+ '.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.c': 'c', '.h': 'cpp', '.hpp': 'cpp',
173
+ '.cs': 'csharp',
174
+ '.scala': 'scala',
175
+ '.rb': 'ruby', '.php': 'php', '.swift': 'swift',
176
+ '.kt': 'kotlin', '.kts': 'kotlin',
177
+ '.lua': 'lua', '.sh': 'bash', '.bash': 'bash', '.zsh': 'bash',
178
+ '.sql': 'sql', '.r': 'r', '.R': 'r',
179
+ '.dart': 'dart', '.ex': 'elixir', '.exs': 'elixir',
180
+ '.hs': 'haskell', '.ml': 'ocaml',
181
+ '.vue': 'vue', '.svelte': 'svelte', '.astro': 'astro',
182
+ '.yaml': 'yaml', '.yml': 'yaml', '.toml': 'toml', '.json': 'json',
183
+ '.md': 'markdown', '.mdx': 'markdown',
184
+ '.html': 'html', '.css': 'css', '.scss': 'scss', '.less': 'less',
185
+ };
186
+ return map[ext.toLowerCase()] ?? 'unknown';
187
+ }
188
+ //# sourceMappingURL=sync.js.map
@@ -0,0 +1,17 @@
1
+ import type { Config } from '../config.js';
2
+ import type { Embedding } from './types.js';
3
+ /**
4
+ * Create an Embedding instance based on the configured provider.
5
+ *
6
+ * - 'openai' Uses the OpenAI API directly (requires OPENAI_API_KEY).
7
+ * - 'ollama' Uses Ollama's OpenAI-compatible /v1/embeddings endpoint.
8
+ * No API key required; defaults to model "nomic-embed-text".
9
+ * - 'local' Uses any OpenAI-compatible server at OPENAI_BASE_URL.
10
+ * Useful for LM Studio, vLLM, LocalAI, etc.
11
+ *
12
+ * The key insight is that Ollama and most local servers expose an
13
+ * OpenAI-compatible embeddings API, so we reuse OpenAIEmbedding
14
+ * with different connection parameters rather than creating separate classes.
15
+ */
16
+ export declare function createEmbedding(config: Config): Embedding;
17
+ //# sourceMappingURL=factory.d.ts.map
@@ -0,0 +1,41 @@
1
+ import { OpenAIEmbedding } from './openai.js';
2
+ /**
3
+ * Create an Embedding instance based on the configured provider.
4
+ *
5
+ * - 'openai' Uses the OpenAI API directly (requires OPENAI_API_KEY).
6
+ * - 'ollama' Uses Ollama's OpenAI-compatible /v1/embeddings endpoint.
7
+ * No API key required; defaults to model "nomic-embed-text".
8
+ * - 'local' Uses any OpenAI-compatible server at OPENAI_BASE_URL.
9
+ * Useful for LM Studio, vLLM, LocalAI, etc.
10
+ *
11
+ * The key insight is that Ollama and most local servers expose an
12
+ * OpenAI-compatible embeddings API, so we reuse OpenAIEmbedding
13
+ * with different connection parameters rather than creating separate classes.
14
+ */
15
+ export function createEmbedding(config) {
16
+ switch (config.embeddingProvider) {
17
+ case 'openai':
18
+ return new OpenAIEmbedding();
19
+ case 'ollama':
20
+ // Ollama exposes OpenAI-compatible /v1/embeddings endpoint.
21
+ // It ignores the API key but the OpenAI SDK requires a non-empty string.
22
+ return new OpenAIEmbedding({
23
+ apiKey: config.openaiApiKey || 'ollama',
24
+ baseUrl: config.ollamaBaseUrl,
25
+ model: config.embeddingModel,
26
+ });
27
+ case 'local':
28
+ // Generic OpenAI-compatible endpoint (LM Studio, vLLM, LocalAI, etc.).
29
+ // API key is optional (many local servers skip auth).
30
+ return new OpenAIEmbedding({
31
+ apiKey: config.openaiApiKey || 'local',
32
+ baseUrl: config.openaiBaseUrl,
33
+ model: config.embeddingModel,
34
+ });
35
+ default: {
36
+ const _exhaustive = config.embeddingProvider;
37
+ throw new Error(`Unknown embedding provider: ${_exhaustive}`);
38
+ }
39
+ }
40
+ }
41
+ //# sourceMappingURL=factory.js.map
@@ -0,0 +1,45 @@
1
+ import { type Embedding, type EmbeddingVector } from './types.js';
2
+ export declare function contentHash(text: string): string;
3
+ export interface OpenAIEmbeddingOptions {
4
+ apiKey?: string;
5
+ baseUrl?: string;
6
+ model?: string;
7
+ }
8
+ export declare class OpenAIEmbedding implements Embedding {
9
+ private client;
10
+ private model;
11
+ private _dimension;
12
+ private initialized;
13
+ private memoryCache;
14
+ private cacheDir;
15
+ constructor(options?: OpenAIEmbeddingOptions);
16
+ get dimension(): number;
17
+ /**
18
+ * Validate the API key and detect embedding dimension by embedding a test string.
19
+ * Must be called once before any other operations.
20
+ */
21
+ initialize(): Promise<void>;
22
+ private ensureInitialized;
23
+ embed(text: string): Promise<EmbeddingVector>;
24
+ embedBatch(texts: string[]): Promise<EmbeddingVector[]>;
25
+ /**
26
+ * Estimate the token cost for embedding a set of texts.
27
+ * Rough heuristic: ~4 chars per token for code.
28
+ * Cost rates are model-specific; local models (Ollama, etc.) are free.
29
+ */
30
+ estimateTokens(texts: string[]): {
31
+ totalChars: number;
32
+ estimatedTokens: number;
33
+ estimatedCostUsd: number;
34
+ };
35
+ /**
36
+ * Set a value in the memory cache, evicting the oldest entry if at capacity.
37
+ */
38
+ private setMemoryCache;
39
+ private callWithRetry;
40
+ private callApi;
41
+ private getDiskCachePath;
42
+ private readDiskCache;
43
+ private writeDiskCache;
44
+ }
45
+ //# sourceMappingURL=openai.d.ts.map
@@ -0,0 +1,243 @@
1
+ import { createHash } from 'node:crypto';
2
+ import fsp from 'node:fs/promises';
3
+ import path from 'node:path';
4
+ import OpenAI from 'openai';
5
+ import { EmbeddingError } from '../errors.js';
6
+ import { getConfig } from '../config.js';
7
+ import { getCacheDir } from '../paths.js';
8
+ import { truncateToSafeLength } from './truncate.js';
9
+ const RETRY_DELAYS = [1000, 4000, 16000]; // exponential backoff
10
+ const RETRYABLE_STATUS = new Set([429, 500, 502, 503]);
11
+ const MAX_MEMORY_CACHE_SIZE = 10_000;
12
+ const MAX_RETRY_AFTER_MS = 60_000;
13
+ export function contentHash(text) {
14
+ return createHash('sha256').update(text).digest('hex').slice(0, 16);
15
+ }
16
+ export class OpenAIEmbedding {
17
+ client;
18
+ model;
19
+ _dimension = 0;
20
+ initialized = false;
21
+ memoryCache = new Map();
22
+ cacheDir;
23
+ constructor(options) {
24
+ const config = getConfig();
25
+ const apiKey = options?.apiKey ?? config.openaiApiKey;
26
+ const baseUrl = options?.baseUrl ?? config.openaiBaseUrl;
27
+ this.client = new OpenAI({
28
+ apiKey,
29
+ ...(baseUrl && { baseURL: baseUrl }),
30
+ });
31
+ this.model = options?.model ?? config.embeddingModel;
32
+ this.cacheDir = path.join(getCacheDir(), 'embeddings');
33
+ }
34
+ get dimension() {
35
+ return this._dimension;
36
+ }
37
+ /**
38
+ * Validate the API key and detect embedding dimension by embedding a test string.
39
+ * Must be called once before any other operations.
40
+ */
41
+ async initialize() {
42
+ try {
43
+ const result = await this.callApi(['dimension probe']);
44
+ this._dimension = result[0].length;
45
+ this.initialized = true;
46
+ console.log(`Embedding model "${this.model}" validated. Dimension: ${this._dimension}`);
47
+ }
48
+ catch (err) {
49
+ throw new EmbeddingError(`Failed to initialize embedding provider. Check your API key, base URL, and model name. ` +
50
+ `Model: "${this.model}"`, err);
51
+ }
52
+ }
53
+ ensureInitialized() {
54
+ if (!this.initialized) {
55
+ throw new EmbeddingError('Embedding provider not initialized. Call initialize() before embed/embedBatch.');
56
+ }
57
+ }
58
+ async embed(text) {
59
+ this.ensureInitialized();
60
+ const results = await this.embedBatch([text]);
61
+ return results[0];
62
+ }
63
+ async embedBatch(texts) {
64
+ this.ensureInitialized();
65
+ if (texts.length === 0)
66
+ return [];
67
+ // Track which indices have empty/whitespace-only text so we return zero vectors for them
68
+ const emptyIndices = new Set();
69
+ for (let i = 0; i < texts.length; i++) {
70
+ if (texts[i].trim().length === 0) {
71
+ emptyIndices.add(i);
72
+ }
73
+ }
74
+ const results = new Array(texts.length).fill(null);
75
+ // Fill empty-text slots with zero vectors immediately
76
+ for (const i of emptyIndices) {
77
+ results[i] = new Array(this._dimension).fill(0);
78
+ }
79
+ // Check caches for non-empty texts
80
+ for (let i = 0; i < texts.length; i++) {
81
+ if (emptyIndices.has(i))
82
+ continue;
83
+ const hash = contentHash(texts[i]);
84
+ const memHit = this.memoryCache.get(hash);
85
+ if (memHit) {
86
+ results[i] = memHit;
87
+ continue;
88
+ }
89
+ const diskHit = await this.readDiskCache(hash);
90
+ if (diskHit) {
91
+ this.setMemoryCache(hash, diskHit);
92
+ results[i] = diskHit;
93
+ }
94
+ }
95
+ const uncachedIndices = [];
96
+ const uncachedTexts = [];
97
+ for (let i = 0; i < texts.length; i++) {
98
+ if (results[i] === null) {
99
+ uncachedIndices.push(i);
100
+ uncachedTexts.push(texts[i]);
101
+ }
102
+ }
103
+ if (uncachedTexts.length === 0) {
104
+ return results;
105
+ }
106
+ const batchSize = getConfig().embeddingBatchSize;
107
+ const freshEmbeddings = [];
108
+ for (let offset = 0; offset < uncachedTexts.length; offset += batchSize) {
109
+ const batch = uncachedTexts.slice(offset, offset + batchSize);
110
+ const batchResult = await this.callWithRetry(batch);
111
+ freshEmbeddings.push(...batchResult);
112
+ }
113
+ for (let i = 0; i < uncachedIndices.length; i++) {
114
+ const idx = uncachedIndices[i];
115
+ const hash = contentHash(texts[idx]);
116
+ const vec = freshEmbeddings[i];
117
+ this.setMemoryCache(hash, vec);
118
+ // Fire-and-forget: don't await the disk write
119
+ this.writeDiskCache(hash, vec);
120
+ results[idx] = vec;
121
+ }
122
+ // Verify no nulls remain before casting
123
+ if (results.some(r => r === null)) {
124
+ throw new EmbeddingError('Missing embeddings: some texts did not receive vectors after cache lookup and API call.');
125
+ }
126
+ return results;
127
+ }
128
+ /**
129
+ * Estimate the token cost for embedding a set of texts.
130
+ * Rough heuristic: ~4 chars per token for code.
131
+ * Cost rates are model-specific; local models (Ollama, etc.) are free.
132
+ */
133
+ estimateTokens(texts) {
134
+ const totalChars = texts.reduce((sum, t) => sum + t.length, 0);
135
+ const estimatedTokens = Math.ceil(totalChars / 4);
136
+ // Per-million-token pricing for known OpenAI models; local models are free
137
+ const COST_PER_MILLION = {
138
+ 'text-embedding-3-small': 0.02,
139
+ 'text-embedding-3-large': 0.13,
140
+ 'text-embedding-ada-002': 0.10,
141
+ };
142
+ const rate = COST_PER_MILLION[this.model] ?? 0;
143
+ const estimatedCostUsd = (estimatedTokens / 1_000_000) * rate;
144
+ return { totalChars, estimatedTokens, estimatedCostUsd };
145
+ }
146
+ /**
147
+ * Set a value in the memory cache, evicting the oldest entry if at capacity.
148
+ */
149
+ setMemoryCache(hash, vec) {
150
+ if (this.memoryCache.size >= MAX_MEMORY_CACHE_SIZE && !this.memoryCache.has(hash)) {
151
+ // Delete the oldest entry (first key from the iterator)
152
+ const oldest = this.memoryCache.keys().next().value;
153
+ if (oldest !== undefined) {
154
+ this.memoryCache.delete(oldest);
155
+ }
156
+ }
157
+ this.memoryCache.set(hash, vec);
158
+ }
159
+ async callWithRetry(texts) {
160
+ let currentBatchSize = texts.length;
161
+ for (let attempt = 0; attempt <= RETRY_DELAYS.length; attempt++) {
162
+ try {
163
+ const allResults = [];
164
+ for (let offset = 0; offset < texts.length; offset += currentBatchSize) {
165
+ const batch = texts.slice(offset, offset + currentBatchSize);
166
+ const result = await this.callApi(batch);
167
+ allResults.push(...result);
168
+ }
169
+ return allResults;
170
+ }
171
+ catch (err) {
172
+ const status = err.status;
173
+ const isRetryable = status !== undefined && RETRYABLE_STATUS.has(status);
174
+ if (!isRetryable || attempt >= RETRY_DELAYS.length) {
175
+ throw new EmbeddingError(`Embedding API call failed after ${attempt + 1} attempt(s). Status: ${status ?? 'unknown'}`, err);
176
+ }
177
+ let delay = RETRY_DELAYS[attempt];
178
+ if (status === 429) {
179
+ const retryAfter = err.headers?.['retry-after'];
180
+ if (retryAfter) {
181
+ const parsed = parseInt(retryAfter, 10);
182
+ if (!isNaN(parsed))
183
+ delay = Math.min(parsed * 1000, MAX_RETRY_AFTER_MS);
184
+ }
185
+ // Halve batch size on rate limit to avoid repeated throttling
186
+ currentBatchSize = Math.max(1, Math.floor(currentBatchSize / 2));
187
+ console.warn(`Rate limited. Retrying in ${delay}ms with batch size ${currentBatchSize}.`);
188
+ }
189
+ else {
190
+ console.warn(`Embedding API error (status ${status}). Retrying in ${delay}ms...`);
191
+ }
192
+ await sleep(delay);
193
+ }
194
+ }
195
+ // Unreachable but satisfies TypeScript compiler
196
+ throw new EmbeddingError('Unexpected: exhausted retries');
197
+ }
198
+ async callApi(texts) {
199
+ const response = await this.client.embeddings.create({
200
+ model: this.model,
201
+ input: texts.map(truncateToSafeLength),
202
+ });
203
+ // Sort by index to guarantee order (API may return out-of-order)
204
+ const sorted = response.data.sort((a, b) => a.index - b.index);
205
+ return sorted.map(d => d.embedding);
206
+ }
207
+ getDiskCachePath(hash) {
208
+ // Shard into subdirectories to avoid too many files in one dir
209
+ const shard = hash.slice(0, 2);
210
+ return path.join(this.cacheDir, this.model.replace(/[^a-zA-Z0-9-]/g, '_'), shard, `${hash}.json`);
211
+ }
212
+ async readDiskCache(hash) {
213
+ const filepath = this.getDiskCachePath(hash);
214
+ try {
215
+ const data = await fsp.readFile(filepath, 'utf-8');
216
+ return JSON.parse(data);
217
+ }
218
+ catch (err) {
219
+ const code = err.code;
220
+ if (code === 'ENOENT') {
221
+ // File doesn't exist -- normal cache miss
222
+ return null;
223
+ }
224
+ // Parse error or other I/O problem: warn and remove corrupted file
225
+ console.warn(`Corrupted embedding cache file ${filepath}, deleting.`);
226
+ fsp.unlink(filepath).catch(() => { });
227
+ return null;
228
+ }
229
+ }
230
+ writeDiskCache(hash, vector) {
231
+ const filepath = this.getDiskCachePath(hash);
232
+ // Fire-and-forget async write
233
+ fsp.mkdir(path.dirname(filepath), { recursive: true })
234
+ .then(() => fsp.writeFile(filepath, JSON.stringify(vector)))
235
+ .catch(() => {
236
+ // Non-fatal: cache write failure doesn't block indexing
237
+ });
238
+ }
239
+ }
240
+ function sleep(ms) {
241
+ return new Promise(resolve => setTimeout(resolve, ms));
242
+ }
243
+ //# sourceMappingURL=openai.js.map
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Truncate text to stay within the embedding model's token limit.
3
+ * Cuts at the last newline boundary to avoid splitting mid-line.
4
+ */
5
+ export declare function truncateToSafeLength(text: string): string;
6
+ //# sourceMappingURL=truncate.d.ts.map
@@ -0,0 +1,14 @@
1
+ // Safety margin below 8,191 token limit (~4 chars/token for code)
2
+ const MAX_EMBED_CHARS = 6000;
3
+ /**
4
+ * Truncate text to stay within the embedding model's token limit.
5
+ * Cuts at the last newline boundary to avoid splitting mid-line.
6
+ */
7
+ export function truncateToSafeLength(text) {
8
+ if (text.length <= MAX_EMBED_CHARS)
9
+ return text;
10
+ const truncated = text.slice(0, MAX_EMBED_CHARS);
11
+ const lastNewline = truncated.lastIndexOf('\n');
12
+ return lastNewline > 0 ? truncated.slice(0, lastNewline) : truncated;
13
+ }
14
+ //# sourceMappingURL=truncate.js.map
@@ -0,0 +1,18 @@
1
+ export type EmbeddingVector = number[];
2
+ export interface TokenEstimate {
3
+ totalChars: number;
4
+ estimatedTokens: number;
5
+ estimatedCostUsd: number;
6
+ }
7
+ export interface Embedding {
8
+ /**
9
+ * Validate provider connectivity and detect embedding dimension.
10
+ * Must be called once before any embed/embedBatch operations.
11
+ */
12
+ initialize(): Promise<void>;
13
+ embed(text: string): Promise<EmbeddingVector>;
14
+ embedBatch(texts: string[]): Promise<EmbeddingVector[]>;
15
+ estimateTokens(texts: string[]): TokenEstimate;
16
+ readonly dimension: number;
17
+ }
18
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,17 @@
1
+ export declare class EideticError extends Error {
2
+ readonly cause?: unknown | undefined;
3
+ constructor(message: string, cause?: unknown | undefined);
4
+ }
5
+ export declare class ConfigError extends EideticError {
6
+ }
7
+ export declare class EmbeddingError extends EideticError {
8
+ }
9
+ export declare class VectorDBError extends EideticError {
10
+ }
11
+ export declare class IndexingError extends EideticError {
12
+ }
13
+ export declare class SearchError extends EideticError {
14
+ }
15
+ export declare class BootstrapError extends EideticError {
16
+ }
17
+ //# sourceMappingURL=errors.d.ts.map
package/dist/errors.js ADDED
@@ -0,0 +1,21 @@
1
+ export class EideticError extends Error {
2
+ cause;
3
+ constructor(message, cause) {
4
+ super(message);
5
+ this.cause = cause;
6
+ this.name = this.constructor.name;
7
+ }
8
+ }
9
+ export class ConfigError extends EideticError {
10
+ }
11
+ export class EmbeddingError extends EideticError {
12
+ }
13
+ export class VectorDBError extends EideticError {
14
+ }
15
+ export class IndexingError extends EideticError {
16
+ }
17
+ export class SearchError extends EideticError {
18
+ }
19
+ export class BootstrapError extends EideticError {
20
+ }
21
+ //# sourceMappingURL=errors.js.map
@@ -0,0 +1,12 @@
1
+ import type { PreviewResult, IndexResult } from './core/indexer.js';
2
+ import type { CodebaseState } from './state/snapshot.js';
3
+ export declare function textResult(text: string): {
4
+ content: {
5
+ type: "text";
6
+ text: string;
7
+ }[];
8
+ };
9
+ export declare function formatIndexResult(result: IndexResult, normalizedPath: string): string;
10
+ export declare function formatPreview(preview: PreviewResult, rootPath: string): string;
11
+ export declare function formatListIndexed(states: CodebaseState[]): string;
12
+ //# sourceMappingURL=format.d.ts.map