claude-eidetic 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/config.d.ts +87 -0
  2. package/dist/config.js +65 -0
  3. package/dist/core/indexer.d.ts +18 -0
  4. package/dist/core/indexer.js +169 -0
  5. package/dist/core/preview.d.ts +14 -0
  6. package/dist/core/preview.js +61 -0
  7. package/dist/core/searcher.d.ts +24 -0
  8. package/dist/core/searcher.js +101 -0
  9. package/dist/core/snapshot-io.d.ts +6 -0
  10. package/dist/core/snapshot-io.js +39 -0
  11. package/dist/core/sync.d.ts +35 -0
  12. package/dist/core/sync.js +188 -0
  13. package/dist/embedding/factory.d.ts +17 -0
  14. package/dist/embedding/factory.js +41 -0
  15. package/dist/embedding/openai.d.ts +45 -0
  16. package/dist/embedding/openai.js +243 -0
  17. package/dist/embedding/truncate.d.ts +6 -0
  18. package/dist/embedding/truncate.js +14 -0
  19. package/dist/embedding/types.d.ts +18 -0
  20. package/dist/embedding/types.js +2 -0
  21. package/dist/errors.d.ts +17 -0
  22. package/dist/errors.js +21 -0
  23. package/dist/format.d.ts +12 -0
  24. package/dist/format.js +97 -0
  25. package/dist/index.d.ts +3 -0
  26. package/dist/index.js +109 -0
  27. package/dist/infra/qdrant-bootstrap.d.ts +2 -0
  28. package/dist/infra/qdrant-bootstrap.js +94 -0
  29. package/dist/paths.d.ts +11 -0
  30. package/dist/paths.js +41 -0
  31. package/dist/splitter/ast.d.ts +13 -0
  32. package/dist/splitter/ast.js +169 -0
  33. package/dist/splitter/line.d.ts +14 -0
  34. package/dist/splitter/line.js +109 -0
  35. package/dist/splitter/types.d.ts +11 -0
  36. package/dist/splitter/types.js +2 -0
  37. package/dist/state/registry.d.ts +8 -0
  38. package/dist/state/registry.js +33 -0
  39. package/dist/state/snapshot.d.ts +26 -0
  40. package/dist/state/snapshot.js +101 -0
  41. package/dist/tool-schemas.d.ts +135 -0
  42. package/dist/tool-schemas.js +162 -0
  43. package/dist/tools.d.ts +40 -0
  44. package/dist/tools.js +169 -0
  45. package/dist/vectordb/milvus.d.ts +33 -0
  46. package/dist/vectordb/milvus.js +328 -0
  47. package/dist/vectordb/qdrant.d.ts +51 -0
  48. package/dist/vectordb/qdrant.js +241 -0
  49. package/dist/vectordb/types.d.ts +35 -0
  50. package/dist/vectordb/types.js +2 -0
  51. package/package.json +62 -0
@@ -0,0 +1,87 @@
1
+ import { z } from 'zod';
2
+ declare const configSchema: z.ZodEffects<z.ZodObject<{
3
+ embeddingProvider: z.ZodDefault<z.ZodEnum<["openai", "ollama", "local"]>>;
4
+ openaiApiKey: z.ZodDefault<z.ZodString>;
5
+ openaiBaseUrl: z.ZodOptional<z.ZodString>;
6
+ ollamaBaseUrl: z.ZodDefault<z.ZodString>;
7
+ embeddingModel: z.ZodOptional<z.ZodString>;
8
+ embeddingBatchSize: z.ZodDefault<z.ZodNumber>;
9
+ indexingConcurrency: z.ZodDefault<z.ZodNumber>;
10
+ qdrantUrl: z.ZodDefault<z.ZodString>;
11
+ qdrantApiKey: z.ZodOptional<z.ZodString>;
12
+ vectordbProvider: z.ZodDefault<z.ZodEnum<["qdrant", "milvus"]>>;
13
+ milvusAddress: z.ZodDefault<z.ZodString>;
14
+ milvusToken: z.ZodOptional<z.ZodString>;
15
+ eideticDataDir: z.ZodDefault<z.ZodString>;
16
+ customExtensions: z.ZodEffects<z.ZodDefault<z.ZodArray<z.ZodString, "many">>, string[], unknown>;
17
+ customIgnorePatterns: z.ZodEffects<z.ZodDefault<z.ZodArray<z.ZodString, "many">>, string[], unknown>;
18
+ }, "strip", z.ZodTypeAny, {
19
+ embeddingProvider: "openai" | "ollama" | "local";
20
+ openaiApiKey: string;
21
+ ollamaBaseUrl: string;
22
+ embeddingBatchSize: number;
23
+ indexingConcurrency: number;
24
+ qdrantUrl: string;
25
+ vectordbProvider: "qdrant" | "milvus";
26
+ milvusAddress: string;
27
+ eideticDataDir: string;
28
+ customExtensions: string[];
29
+ customIgnorePatterns: string[];
30
+ openaiBaseUrl?: string | undefined;
31
+ embeddingModel?: string | undefined;
32
+ qdrantApiKey?: string | undefined;
33
+ milvusToken?: string | undefined;
34
+ }, {
35
+ embeddingProvider?: "openai" | "ollama" | "local" | undefined;
36
+ openaiApiKey?: string | undefined;
37
+ openaiBaseUrl?: string | undefined;
38
+ ollamaBaseUrl?: string | undefined;
39
+ embeddingModel?: string | undefined;
40
+ embeddingBatchSize?: number | undefined;
41
+ indexingConcurrency?: number | undefined;
42
+ qdrantUrl?: string | undefined;
43
+ qdrantApiKey?: string | undefined;
44
+ vectordbProvider?: "qdrant" | "milvus" | undefined;
45
+ milvusAddress?: string | undefined;
46
+ milvusToken?: string | undefined;
47
+ eideticDataDir?: string | undefined;
48
+ customExtensions?: unknown;
49
+ customIgnorePatterns?: unknown;
50
+ }>, {
51
+ embeddingModel: string;
52
+ embeddingProvider: "openai" | "ollama" | "local";
53
+ openaiApiKey: string;
54
+ ollamaBaseUrl: string;
55
+ embeddingBatchSize: number;
56
+ indexingConcurrency: number;
57
+ qdrantUrl: string;
58
+ vectordbProvider: "qdrant" | "milvus";
59
+ milvusAddress: string;
60
+ eideticDataDir: string;
61
+ customExtensions: string[];
62
+ customIgnorePatterns: string[];
63
+ openaiBaseUrl?: string | undefined;
64
+ qdrantApiKey?: string | undefined;
65
+ milvusToken?: string | undefined;
66
+ }, {
67
+ embeddingProvider?: "openai" | "ollama" | "local" | undefined;
68
+ openaiApiKey?: string | undefined;
69
+ openaiBaseUrl?: string | undefined;
70
+ ollamaBaseUrl?: string | undefined;
71
+ embeddingModel?: string | undefined;
72
+ embeddingBatchSize?: number | undefined;
73
+ indexingConcurrency?: number | undefined;
74
+ qdrantUrl?: string | undefined;
75
+ qdrantApiKey?: string | undefined;
76
+ vectordbProvider?: "qdrant" | "milvus" | undefined;
77
+ milvusAddress?: string | undefined;
78
+ milvusToken?: string | undefined;
79
+ eideticDataDir?: string | undefined;
80
+ customExtensions?: unknown;
81
+ customIgnorePatterns?: unknown;
82
+ }>;
83
+ export type Config = z.infer<typeof configSchema>;
84
+ export declare function loadConfig(): Config;
85
+ export declare function getConfig(): Config;
86
+ export {};
87
+ //# sourceMappingURL=config.d.ts.map
package/dist/config.js ADDED
@@ -0,0 +1,65 @@
1
+ import os from 'node:os';
2
+ import path from 'node:path';
3
+ import { z } from 'zod';
4
+ import { ConfigError } from './errors.js';
5
+ const configSchema = z.object({
6
+ embeddingProvider: z.enum(['openai', 'ollama', 'local']).default('openai'),
7
+ openaiApiKey: z.string().default(''),
8
+ openaiBaseUrl: z.string().optional(),
9
+ ollamaBaseUrl: z.string().default('http://localhost:11434/v1'),
10
+ embeddingModel: z.string().optional(),
11
+ embeddingBatchSize: z.coerce.number().int().min(1).max(2048).default(100),
12
+ indexingConcurrency: z.coerce.number().int().min(1).max(32).default(8),
13
+ qdrantUrl: z.string().default('http://localhost:6333'),
14
+ qdrantApiKey: z.string().optional(),
15
+ vectordbProvider: z.enum(['qdrant', 'milvus']).default('qdrant'),
16
+ milvusAddress: z.string().default('localhost:19530'),
17
+ milvusToken: z.string().optional(),
18
+ eideticDataDir: z.string().default(path.join(os.homedir(), '.eidetic')),
19
+ customExtensions: z.preprocess((val) => typeof val === 'string' ? JSON.parse(val) : val, z.array(z.string()).default([])),
20
+ customIgnorePatterns: z.preprocess((val) => typeof val === 'string' ? JSON.parse(val) : val, z.array(z.string()).default([])),
21
+ }).transform((cfg) => ({
22
+ ...cfg,
23
+ // Default embedding model depends on provider
24
+ embeddingModel: cfg.embeddingModel
25
+ ?? (cfg.embeddingProvider === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small'),
26
+ }));
27
+ let cachedConfig = null;
28
+ export function loadConfig() {
29
+ const raw = {
30
+ embeddingProvider: process.env.EMBEDDING_PROVIDER,
31
+ openaiApiKey: process.env.OPENAI_API_KEY ?? '',
32
+ openaiBaseUrl: process.env.OPENAI_BASE_URL || undefined,
33
+ ollamaBaseUrl: process.env.OLLAMA_BASE_URL,
34
+ embeddingModel: process.env.EMBEDDING_MODEL || undefined,
35
+ embeddingBatchSize: process.env.EMBEDDING_BATCH_SIZE,
36
+ indexingConcurrency: process.env.INDEXING_CONCURRENCY,
37
+ qdrantUrl: process.env.QDRANT_URL,
38
+ qdrantApiKey: process.env.QDRANT_API_KEY || undefined,
39
+ vectordbProvider: process.env.VECTORDB_PROVIDER,
40
+ milvusAddress: process.env.MILVUS_ADDRESS,
41
+ milvusToken: process.env.MILVUS_TOKEN || undefined,
42
+ eideticDataDir: process.env.EIDETIC_DATA_DIR,
43
+ customExtensions: process.env.CUSTOM_EXTENSIONS,
44
+ customIgnorePatterns: process.env.CUSTOM_IGNORE_PATTERNS,
45
+ };
46
+ const result = configSchema.safeParse(raw);
47
+ if (!result.success) {
48
+ const issues = result.error.issues.map(i => ` ${i.path.join('.')}: ${i.message}`).join('\n');
49
+ throw new ConfigError(`Invalid configuration:\n${issues}`);
50
+ }
51
+ const config = result.data;
52
+ if (config.embeddingProvider === 'openai' && !config.openaiApiKey) {
53
+ throw new ConfigError('OPENAI_API_KEY is required when EMBEDDING_PROVIDER is "openai" (the default). ' +
54
+ 'Set EMBEDDING_PROVIDER=ollama or EMBEDDING_PROVIDER=local to use a local embedding server.');
55
+ }
56
+ cachedConfig = config;
57
+ return cachedConfig;
58
+ }
59
+ export function getConfig() {
60
+ if (!cachedConfig) {
61
+ return loadConfig();
62
+ }
63
+ return cachedConfig;
64
+ }
65
+ //# sourceMappingURL=config.js.map
@@ -0,0 +1,18 @@
1
+ import type { Embedding } from '../embedding/types.js';
2
+ import type { VectorDB } from '../vectordb/types.js';
3
+ export { previewCodebase, type PreviewResult } from './preview.js';
4
+ export { saveSnapshot, deleteSnapshot, snapshotExists } from './snapshot-io.js';
5
+ export interface IndexResult {
6
+ totalFiles: number;
7
+ totalChunks: number;
8
+ addedFiles: number;
9
+ modifiedFiles: number;
10
+ removedFiles: number;
11
+ skippedFiles: number;
12
+ estimatedTokens: number;
13
+ estimatedCostUsd: number;
14
+ durationMs: number;
15
+ parseFailures: string[];
16
+ }
17
+ export declare function indexCodebase(rootPath: string, embedding: Embedding, vectordb: VectorDB, force?: boolean, onProgress?: (pct: number, msg: string) => void, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<IndexResult>;
18
+ //# sourceMappingURL=indexer.d.ts.map
@@ -0,0 +1,169 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { randomUUID } from 'node:crypto';
4
+ import { AstSplitter } from '../splitter/ast.js';
5
+ import { LineSplitter } from '../splitter/line.js';
6
+ import { scanFiles, buildSnapshot, diffSnapshots, extensionToLanguage } from './sync.js';
7
+ import { getConfig } from '../config.js';
8
+ import { normalizePath, pathToCollectionName } from '../paths.js';
9
+ import { IndexingError } from '../errors.js';
10
+ import { loadSnapshot, saveSnapshot } from './snapshot-io.js';
11
+ export { previewCodebase } from './preview.js';
12
+ export { saveSnapshot, deleteSnapshot, snapshotExists } from './snapshot-io.js';
13
+ export async function indexCodebase(rootPath, embedding, vectordb, force = false, onProgress, customExtensions, customIgnorePatterns) {
14
+ const start = Date.now();
15
+ const normalizedPath = normalizePath(rootPath);
16
+ const collectionName = pathToCollectionName(normalizedPath);
17
+ const config = getConfig();
18
+ onProgress?.(0, 'Scanning files...');
19
+ const filePaths = await scanFiles(normalizedPath, customExtensions, customIgnorePatterns);
20
+ if (filePaths.length === 0) {
21
+ throw new IndexingError(`No indexable files found in ${normalizedPath}`);
22
+ }
23
+ const currentSnapshot = buildSnapshot(normalizedPath, filePaths);
24
+ let filesToProcess;
25
+ let removedFiles = [];
26
+ let addedCount = 0;
27
+ let modifiedCount = 0;
28
+ if (force) {
29
+ onProgress?.(5, 'Dropping existing index...');
30
+ await vectordb.dropCollection(collectionName);
31
+ await vectordb.createCollection(collectionName, embedding.dimension);
32
+ filesToProcess = filePaths;
33
+ addedCount = filePaths.length;
34
+ }
35
+ else {
36
+ const previousSnapshot = loadSnapshot(normalizedPath);
37
+ if (!previousSnapshot || !(await vectordb.hasCollection(collectionName))) {
38
+ // First time indexing
39
+ await vectordb.createCollection(collectionName, embedding.dimension);
40
+ filesToProcess = filePaths;
41
+ addedCount = filePaths.length;
42
+ }
43
+ else {
44
+ const diff = diffSnapshots(previousSnapshot, currentSnapshot);
45
+ addedCount = diff.added.length;
46
+ modifiedCount = diff.modified.length;
47
+ removedFiles = diff.removed;
48
+ const toDelete = [...diff.removed, ...diff.modified];
49
+ for (const rel of toDelete) {
50
+ await vectordb.deleteByPath(collectionName, rel);
51
+ }
52
+ filesToProcess = [...diff.added, ...diff.modified];
53
+ }
54
+ }
55
+ if (filesToProcess.length === 0) {
56
+ saveSnapshot(normalizedPath, currentSnapshot);
57
+ return {
58
+ totalFiles: filePaths.length,
59
+ totalChunks: 0,
60
+ addedFiles: 0,
61
+ modifiedFiles: 0,
62
+ removedFiles: removedFiles.length,
63
+ skippedFiles: filePaths.length,
64
+ estimatedTokens: 0,
65
+ estimatedCostUsd: 0,
66
+ durationMs: Date.now() - start,
67
+ parseFailures: [],
68
+ };
69
+ }
70
+ onProgress?.(10, `Splitting ${filesToProcess.length} files...`);
71
+ const astSplitter = new AstSplitter();
72
+ const lineSplitter = new LineSplitter();
73
+ const allChunks = [];
74
+ const parseFailures = [];
75
+ const concurrency = config.indexingConcurrency;
76
+ for (let i = 0; i < filesToProcess.length; i += concurrency) {
77
+ const batch = filesToProcess.slice(i, i + concurrency);
78
+ const batchResults = await Promise.all(batch.map(async (relPath) => {
79
+ const fullPath = path.join(normalizedPath, relPath);
80
+ try {
81
+ const code = fs.readFileSync(fullPath, 'utf-8');
82
+ if (code.trim().length === 0)
83
+ return { chunks: [], failed: false };
84
+ const ext = path.extname(relPath);
85
+ const language = extensionToLanguage(ext);
86
+ let chunks = astSplitter.split(code, language, relPath);
87
+ if (chunks.length === 0) {
88
+ chunks = lineSplitter.split(code, language, relPath);
89
+ }
90
+ if (chunks.length === 0)
91
+ return { chunks: [], failed: true };
92
+ return { chunks, failed: false };
93
+ }
94
+ catch (err) {
95
+ console.warn(`Failed to process "${relPath}": ${err}`);
96
+ return { chunks: [], failed: true };
97
+ }
98
+ }));
99
+ for (let j = 0; j < batchResults.length; j++) {
100
+ const { chunks, failed } = batchResults[j];
101
+ allChunks.push(...chunks);
102
+ if (failed)
103
+ parseFailures.push(batch[j]);
104
+ }
105
+ }
106
+ if (parseFailures.length > 0) {
107
+ console.warn(`Warning: ${parseFailures.length} file(s) produced no chunks: ${parseFailures.slice(0, 10).join(', ')}` +
108
+ (parseFailures.length > 10 ? ` (and ${parseFailures.length - 10} more)` : ''));
109
+ }
110
+ if (allChunks.length === 0) {
111
+ saveSnapshot(normalizedPath, currentSnapshot);
112
+ return {
113
+ totalFiles: filePaths.length,
114
+ totalChunks: 0,
115
+ addedFiles: addedCount,
116
+ modifiedFiles: modifiedCount,
117
+ removedFiles: removedFiles.length,
118
+ skippedFiles: filePaths.length - filesToProcess.length,
119
+ estimatedTokens: 0,
120
+ estimatedCostUsd: 0,
121
+ durationMs: Date.now() - start,
122
+ parseFailures,
123
+ };
124
+ }
125
+ const chunkTexts = allChunks.map(c => c.content);
126
+ const estimation = embedding.estimateTokens(chunkTexts);
127
+ console.log(`Indexing ${filesToProcess.length} files -> ${allChunks.length} chunks -> ` +
128
+ `~${(estimation.estimatedTokens / 1000).toFixed(0)}K tokens (~$${estimation.estimatedCostUsd.toFixed(4)})`);
129
+ const batchSize = config.embeddingBatchSize;
130
+ let processedChunks = 0;
131
+ for (let i = 0; i < allChunks.length; i += batchSize) {
132
+ const batch = allChunks.slice(i, i + batchSize);
133
+ const texts = batch.map(c => c.content);
134
+ const pct = 10 + Math.round((i / allChunks.length) * 85);
135
+ onProgress?.(pct, `Embedding batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(allChunks.length / batchSize)}...`);
136
+ const vectors = await embedding.embedBatch(texts);
137
+ if (vectors.length !== texts.length) {
138
+ throw new IndexingError(`Embedding dimension mismatch: sent ${texts.length} texts, got ${vectors.length} vectors`);
139
+ }
140
+ const documents = batch.map((chunk, j) => ({
141
+ id: randomUUID(),
142
+ content: chunk.content,
143
+ vector: vectors[j],
144
+ relativePath: chunk.filePath,
145
+ startLine: chunk.startLine,
146
+ endLine: chunk.endLine,
147
+ fileExtension: path.extname(chunk.filePath),
148
+ language: chunk.language,
149
+ }));
150
+ await vectordb.insert(collectionName, documents);
151
+ processedChunks += batch.length;
152
+ }
153
+ onProgress?.(98, 'Saving snapshot...');
154
+ saveSnapshot(normalizedPath, currentSnapshot);
155
+ onProgress?.(100, 'Done');
156
+ return {
157
+ totalFiles: filePaths.length,
158
+ totalChunks: processedChunks,
159
+ addedFiles: addedCount,
160
+ modifiedFiles: modifiedCount,
161
+ removedFiles: removedFiles.length,
162
+ skippedFiles: filePaths.length - filesToProcess.length,
163
+ estimatedTokens: estimation.estimatedTokens,
164
+ estimatedCostUsd: estimation.estimatedCostUsd,
165
+ durationMs: Date.now() - start,
166
+ parseFailures,
167
+ };
168
+ }
169
+ //# sourceMappingURL=indexer.js.map
@@ -0,0 +1,14 @@
1
+ import type { Embedding } from '../embedding/types.js';
2
+ export interface PreviewResult {
3
+ totalFiles: number;
4
+ byExtension: Record<string, number>;
5
+ topDirectories: {
6
+ dir: string;
7
+ count: number;
8
+ }[];
9
+ estimatedTokens: number;
10
+ estimatedCostUsd: number;
11
+ warnings: string[];
12
+ }
13
+ export declare function previewCodebase(rootPath: string, embedding: Embedding, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<PreviewResult>;
14
+ //# sourceMappingURL=preview.d.ts.map
@@ -0,0 +1,61 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { scanFiles } from './sync.js';
4
+ import { normalizePath } from '../paths.js';
5
+ export async function previewCodebase(rootPath, embedding, customExtensions, customIgnorePatterns) {
6
+ const normalizedPath = normalizePath(rootPath);
7
+ const filePaths = await scanFiles(normalizedPath, customExtensions, customIgnorePatterns);
8
+ const byExtension = {};
9
+ for (const f of filePaths) {
10
+ const ext = path.extname(f).toLowerCase() || '(no ext)';
11
+ byExtension[ext] = (byExtension[ext] ?? 0) + 1;
12
+ }
13
+ const dirCounts = {};
14
+ for (const f of filePaths) {
15
+ const firstSeg = f.split(/[/\\]/)[0];
16
+ const dir = f.includes('/') || f.includes('\\') ? firstSeg : '(root)';
17
+ dirCounts[dir] = (dirCounts[dir] ?? 0) + 1;
18
+ }
19
+ const topDirectories = Object.entries(dirCounts)
20
+ .map(([dir, count]) => ({ dir, count }))
21
+ .sort((a, b) => b.count - a.count)
22
+ .slice(0, 10);
23
+ // Estimate tokens from file sizes (rough: sum sizes / 4 chars-per-token)
24
+ let totalBytes = 0;
25
+ for (const f of filePaths) {
26
+ try {
27
+ const stat = fs.statSync(path.join(normalizedPath, f));
28
+ totalBytes += stat.size;
29
+ }
30
+ catch {
31
+ // File may have disappeared between scan and stat
32
+ }
33
+ }
34
+ // Conservative estimate: ~3-4 chars per token for code.
35
+ // May underestimate for dense code; will be refined during actual indexing.
36
+ const estimatedTokens = Math.ceil(totalBytes / 3);
37
+ const estimatedCostUsd = (estimatedTokens / 1_000_000) * 0.02;
38
+ const warnings = [];
39
+ if (filePaths.length === 0) {
40
+ warnings.push('No indexable files found. Check file extension filters and ignore patterns.');
41
+ }
42
+ if (filePaths.length > 5000) {
43
+ warnings.push(`Found ${filePaths.length.toLocaleString()} files. Most codebases have 100-5,000 source files. Consider adding ignore patterns.`);
44
+ }
45
+ if (topDirectories.length > 0 && filePaths.length > 0) {
46
+ const topDir = topDirectories[0];
47
+ const pct = Math.round((topDir.count / filePaths.length) * 100);
48
+ if (pct > 50 && topDir.dir !== '(root)') {
49
+ warnings.push(`Directory '${topDir.dir}/' contains ${pct}% of files -- consider ignoring if it contains build artifacts or dependencies.`);
50
+ }
51
+ }
52
+ return {
53
+ totalFiles: filePaths.length,
54
+ byExtension,
55
+ topDirectories,
56
+ estimatedTokens,
57
+ estimatedCostUsd,
58
+ warnings,
59
+ };
60
+ }
61
+ //# sourceMappingURL=preview.js.map
@@ -0,0 +1,24 @@
1
+ import type { Embedding } from '../embedding/types.js';
2
+ import type { VectorDB, SearchResult } from '../vectordb/types.js';
3
+ export interface SearchOptions {
4
+ limit?: number;
5
+ extensionFilter?: string[];
6
+ }
7
+ export declare function searchCode(rootPath: string, query: string, embedding: Embedding, vectordb: VectorDB, options?: SearchOptions): Promise<SearchResult[]>;
8
+ /**
9
+ * Deduplicate overlapping chunks from the same file.
10
+ * Results are already sorted by score (best first). For each file, keep only
11
+ * chunks whose line ranges do not overlap with an already-accepted chunk.
12
+ */
13
+ export declare function deduplicateResults(results: SearchResult[], limit: number): SearchResult[];
14
+ /**
15
+ * Format search results as a compact markdown table for token-efficient output.
16
+ * Returns file paths, line ranges, scores, and estimated token costs.
17
+ * Consumers use the Read tool to fetch full code for interesting results.
18
+ */
19
+ export declare function formatCompactResults(results: SearchResult[], query: string, rootPath: string): string;
20
+ /**
21
+ * Format search results as markdown for MCP tool output.
22
+ */
23
+ export declare function formatSearchResults(results: SearchResult[], query: string, rootPath: string): string;
24
+ //# sourceMappingURL=searcher.d.ts.map
@@ -0,0 +1,101 @@
1
+ import { normalizePath, pathToCollectionName } from '../paths.js';
2
+ import { SearchError } from '../errors.js';
3
+ const DEFAULT_LIMIT = 10;
4
+ const MAX_LIMIT = 50;
5
+ export async function searchCode(rootPath, query, embedding, vectordb, options = {}) {
6
+ const normalizedPath = normalizePath(rootPath);
7
+ const collectionName = pathToCollectionName(normalizedPath);
8
+ // Check collection exists
9
+ const exists = await vectordb.hasCollection(collectionName);
10
+ if (!exists) {
11
+ throw new SearchError(`Codebase at "${normalizedPath}" is not indexed. ` +
12
+ `Use the index_codebase tool to index it first.`);
13
+ }
14
+ const limit = Math.min(Math.max(1, options.limit ?? DEFAULT_LIMIT), MAX_LIMIT);
15
+ // Embed the query
16
+ const queryVector = await embedding.embed(query);
17
+ // Overfetch to allow dedup to still return enough results
18
+ const overFetchLimit = Math.min(limit * 3, MAX_LIMIT);
19
+ // Hybrid search (dense + full-text + RRF with exponential decay)
20
+ const results = await vectordb.search(collectionName, {
21
+ queryVector,
22
+ queryText: query,
23
+ limit: overFetchLimit,
24
+ extensionFilter: options.extensionFilter,
25
+ });
26
+ // Deduplicate overlapping chunks from the same file (keep highest-scored)
27
+ return deduplicateResults(results, limit);
28
+ }
29
+ /**
30
+ * Deduplicate overlapping chunks from the same file.
31
+ * Results are already sorted by score (best first). For each file, keep only
32
+ * chunks whose line ranges do not overlap with an already-accepted chunk.
33
+ */
34
+ export function deduplicateResults(results, limit) {
35
+ const accepted = [];
36
+ // Track accepted line ranges per file: relativePath -> [startLine, endLine][]
37
+ const fileRanges = new Map();
38
+ for (const r of results) {
39
+ if (accepted.length >= limit)
40
+ break;
41
+ const ranges = fileRanges.get(r.relativePath);
42
+ if (ranges && ranges.some(([s, e]) => r.startLine <= e && r.endLine >= s)) {
43
+ continue; // overlaps with an already-accepted chunk from same file
44
+ }
45
+ accepted.push(r);
46
+ if (!ranges) {
47
+ fileRanges.set(r.relativePath, [[r.startLine, r.endLine]]);
48
+ }
49
+ else {
50
+ ranges.push([r.startLine, r.endLine]);
51
+ }
52
+ }
53
+ return accepted;
54
+ }
55
+ /**
56
+ * Format search results as a compact markdown table for token-efficient output.
57
+ * Returns file paths, line ranges, scores, and estimated token costs.
58
+ * Consumers use the Read tool to fetch full code for interesting results.
59
+ */
60
+ export function formatCompactResults(results, query, rootPath) {
61
+ if (results.length === 0) {
62
+ return `No results found for "${query}" in ${rootPath}.`;
63
+ }
64
+ const lines = [
65
+ `Found ${results.length} result(s) for "${query}" in ${rootPath}:\n`,
66
+ '| # | File | Lines | Score | ~Tokens |',
67
+ '|---|------|-------|-------|---------|',
68
+ ];
69
+ for (let i = 0; i < results.length; i++) {
70
+ const r = results[i];
71
+ const tokens = Math.ceil(r.content.length / 4);
72
+ lines.push(`| ${i + 1} | \`${r.relativePath}\` | ${r.startLine}-${r.endLine} | ${r.score.toFixed(2)} | ~${tokens} |`);
73
+ }
74
+ lines.push('');
75
+ lines.push('Use the Read tool to view full code for specific results.');
76
+ return lines.join('\n');
77
+ }
78
+ /**
79
+ * Format search results as markdown for MCP tool output.
80
+ */
81
+ export function formatSearchResults(results, query, rootPath) {
82
+ if (results.length === 0) {
83
+ return `No results found for "${query}" in ${rootPath}.`;
84
+ }
85
+ const lines = [
86
+ `Found ${results.length} result(s) for "${query}" in ${rootPath}:\n`,
87
+ ];
88
+ for (let i = 0; i < results.length; i++) {
89
+ const r = results[i];
90
+ lines.push(`### Result ${i + 1} of ${results.length}`);
91
+ lines.push(`**File:** \`${r.relativePath}\` (lines ${r.startLine}-${r.endLine})`);
92
+ lines.push(`**Language:** ${r.language} | **Score:** ${r.score.toFixed(4)}`);
93
+ const safeLang = r.language.replace(/[^a-zA-Z0-9_+-]/g, '');
94
+ lines.push('```' + safeLang);
95
+ lines.push(r.content);
96
+ lines.push('```');
97
+ lines.push('');
98
+ }
99
+ return lines.join('\n');
100
+ }
101
+ //# sourceMappingURL=searcher.js.map
@@ -0,0 +1,6 @@
1
+ import type { FileSnapshot } from './sync.js';
2
+ export declare function loadSnapshot(rootPath: string): FileSnapshot | null;
3
+ export declare function saveSnapshot(rootPath: string, snapshot: FileSnapshot): void;
4
+ export declare function deleteSnapshot(rootPath: string): void;
5
+ export declare function snapshotExists(rootPath: string): boolean;
6
+ //# sourceMappingURL=snapshot-io.d.ts.map
@@ -0,0 +1,39 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { pathToCollectionName, getSnapshotDir } from '../paths.js';
4
+ function getSnapshotPath(rootPath) {
5
+ const name = pathToCollectionName(rootPath);
6
+ return path.join(getSnapshotDir(), `${name}.json`);
7
+ }
8
+ export function loadSnapshot(rootPath) {
9
+ const filePath = getSnapshotPath(rootPath);
10
+ try {
11
+ const data = fs.readFileSync(filePath, 'utf-8');
12
+ return JSON.parse(data);
13
+ }
14
+ catch (err) {
15
+ if (err.code === 'ENOENT')
16
+ return null;
17
+ console.warn(`Corrupted snapshot at ${filePath}, ignoring: ${err}`);
18
+ return null;
19
+ }
20
+ }
21
+ export function saveSnapshot(rootPath, snapshot) {
22
+ const filePath = getSnapshotPath(rootPath);
23
+ const dir = path.dirname(filePath);
24
+ fs.mkdirSync(dir, { recursive: true });
25
+ fs.writeFileSync(filePath, JSON.stringify(snapshot));
26
+ }
27
+ export function deleteSnapshot(rootPath) {
28
+ const filePath = getSnapshotPath(rootPath);
29
+ try {
30
+ fs.unlinkSync(filePath);
31
+ }
32
+ catch {
33
+ // Already gone
34
+ }
35
+ }
36
+ export function snapshotExists(rootPath) {
37
+ return fs.existsSync(getSnapshotPath(rootPath));
38
+ }
39
+ //# sourceMappingURL=snapshot-io.js.map
@@ -0,0 +1,35 @@
1
+ export interface FileSnapshot {
2
+ [relativePath: string]: {
3
+ contentHash: string;
4
+ };
5
+ }
6
+ export interface SyncResult {
7
+ added: string[];
8
+ modified: string[];
9
+ removed: string[];
10
+ }
11
+ /**
12
+ * Scan a directory and return relative paths of indexable files.
13
+ * Respects .gitignore if present.
14
+ */
15
+ export declare function scanFiles(rootPath: string, customExtensions?: string[], customIgnore?: string[]): Promise<string[]>;
16
+ /**
17
+ * Build a size+contentHash snapshot for a list of files.
18
+ */
19
+ export declare function buildSnapshot(rootPath: string, relativePaths: string[]): FileSnapshot;
20
+ /**
21
+ * Compare current snapshot to a previous one. Returns added, modified, and removed files.
22
+ * Uses content hash as the authoritative change signal — immune to git ops, IDE formatters,
23
+ * NFS clock skew, and other mtime-only pitfalls.
24
+ */
25
+ export declare function diffSnapshots(previous: FileSnapshot, current: FileSnapshot): SyncResult;
26
+ /**
27
+ * Parse .gitignore content into glob patterns.
28
+ * Pure function — no filesystem access.
29
+ */
30
+ export declare function parseGitignorePatterns(content: string): string[];
31
+ /**
32
+ * Map file extension to language name for the splitter.
33
+ */
34
+ export declare function extensionToLanguage(ext: string): string;
35
+ //# sourceMappingURL=sync.d.ts.map