npm - @tai-io/codesearch - Versions diffs - 2026.313.1614 - Mend

@tai-io/codesearch 2026.313.1614

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/dist/build-info.d.ts +3 -0
package/dist/build-info.js +4 -0
package/dist/config.d.ts +62 -0
package/dist/config.js +52 -0
package/dist/core/cleanup.d.ts +8 -0
package/dist/core/cleanup.js +41 -0
package/dist/core/doc-indexer.d.ts +13 -0
package/dist/core/doc-indexer.js +76 -0
package/dist/core/doc-searcher.d.ts +13 -0
package/dist/core/doc-searcher.js +65 -0
package/dist/core/file-category.d.ts +7 -0
package/dist/core/file-category.js +75 -0
package/dist/core/indexer.d.ts +18 -0
package/dist/core/indexer.js +177 -0
package/dist/core/preview.d.ts +13 -0
package/dist/core/preview.js +58 -0
package/dist/core/repo-map.d.ts +33 -0
package/dist/core/repo-map.js +144 -0
package/dist/core/searcher.d.ts +12 -0
package/dist/core/searcher.js +97 -0
package/dist/core/sync.d.ts +15 -0
package/dist/core/sync.js +212 -0
package/dist/core/targeted-indexer.d.ts +19 -0
package/dist/core/targeted-indexer.js +127 -0
package/dist/embedding/factory.d.ts +4 -0
package/dist/embedding/factory.js +24 -0
package/dist/embedding/openai.d.ts +33 -0
package/dist/embedding/openai.js +234 -0
package/dist/embedding/truncate.d.ts +6 -0
package/dist/embedding/truncate.js +14 -0
package/dist/embedding/types.d.ts +18 -0
package/dist/embedding/types.js +2 -0
package/dist/errors.d.ts +17 -0
package/dist/errors.js +21 -0
package/dist/format.d.ts +18 -0
package/dist/format.js +151 -0
package/dist/hooks/cli-router.d.ts +7 -0
package/dist/hooks/cli-router.js +47 -0
package/dist/hooks/hook-output.d.ts +56 -0
package/dist/hooks/hook-output.js +21 -0
package/dist/hooks/post-tool-use.d.ts +13 -0
package/dist/hooks/post-tool-use.js +123 -0
package/dist/hooks/stop-hook.d.ts +11 -0
package/dist/hooks/stop-hook.js +137 -0
package/dist/hooks/targeted-runner.d.ts +11 -0
package/dist/hooks/targeted-runner.js +58 -0
package/dist/index.d.ts +3 -0
package/dist/index.js +138 -0
package/dist/paths.d.ts +11 -0
package/dist/paths.js +54 -0
package/dist/setup-message.d.ts +4 -0
package/dist/setup-message.js +48 -0
package/dist/splitter/ast.d.ts +13 -0
package/dist/splitter/ast.js +231 -0
package/dist/splitter/line.d.ts +10 -0
package/dist/splitter/line.js +103 -0
package/dist/splitter/symbol-extract.d.ts +16 -0
package/dist/splitter/symbol-extract.js +61 -0
package/dist/splitter/types.d.ts +16 -0
package/dist/splitter/types.js +2 -0
package/dist/state/doc-metadata.d.ts +18 -0
package/dist/state/doc-metadata.js +59 -0
package/dist/state/registry.d.ts +7 -0
package/dist/state/registry.js +46 -0
package/dist/state/snapshot.d.ts +26 -0
package/dist/state/snapshot.js +100 -0
package/dist/tool-schemas.d.ts +215 -0
package/dist/tool-schemas.js +269 -0
package/dist/tools.d.ts +58 -0
package/dist/tools.js +245 -0
package/dist/vectordb/rrf.d.ts +32 -0
package/dist/vectordb/rrf.js +88 -0
package/dist/vectordb/sqlite.d.ts +34 -0
package/dist/vectordb/sqlite.js +624 -0
package/dist/vectordb/types.d.ts +63 -0
package/dist/vectordb/types.js +2 -0
package/messages.yaml +69 -0
package/package.json +79 -0

package/dist/build-info.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export declare const BUILD_VERSION = "2026.313.1614";
+export declare const BUILD_TIMESTAMP = "2026-03-13T16:14:48.402Z";
+//# sourceMappingURL=build-info.d.ts.map

package/dist/build-info.js ADDED Viewed

@@ -0,0 +1,4 @@
+// Auto-generated by scripts/generate-build-info.ts — do not edit
+export const BUILD_VERSION = '2026.313.1614';
+export const BUILD_TIMESTAMP = '2026-03-13T16:14:48.402Z';
+//# sourceMappingURL=build-info.js.map

package/dist/config.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+import { z } from 'zod';
+declare const configSchema: z.ZodEffects<z.ZodObject<{
+    embeddingProvider: z.ZodDefault<z.ZodEnum<["openai", "ollama", "local"]>>;
+    openaiApiKey: z.ZodDefault<z.ZodString>;
+    openaiBaseUrl: z.ZodOptional<z.ZodString>;
+    ollamaBaseUrl: z.ZodDefault<z.ZodString>;
+    embeddingModel: z.ZodOptional<z.ZodString>;
+    embeddingBatchSize: z.ZodDefault<z.ZodNumber>;
+    indexingConcurrency: z.ZodDefault<z.ZodNumber>;
+    eideticDataDir: z.ZodDefault<z.ZodString>;
+    customExtensions: z.ZodEffects<z.ZodDefault<z.ZodArray<z.ZodString, "many">>, string[], unknown>;
+    customIgnorePatterns: z.ZodEffects<z.ZodDefault<z.ZodArray<z.ZodString, "many">>, string[], unknown>;
+}, "strip", z.ZodTypeAny, {
+    embeddingProvider: "openai" | "ollama" | "local";
+    openaiApiKey: string;
+    ollamaBaseUrl: string;
+    embeddingBatchSize: number;
+    indexingConcurrency: number;
+    eideticDataDir: string;
+    customExtensions: string[];
+    customIgnorePatterns: string[];
+    openaiBaseUrl?: string | undefined;
+    embeddingModel?: string | undefined;
+}, {
+    embeddingProvider?: "openai" | "ollama" | "local" | undefined;
+    openaiApiKey?: string | undefined;
+    openaiBaseUrl?: string | undefined;
+    ollamaBaseUrl?: string | undefined;
+    embeddingModel?: string | undefined;
+    embeddingBatchSize?: number | undefined;
+    indexingConcurrency?: number | undefined;
+    eideticDataDir?: string | undefined;
+    customExtensions?: unknown;
+    customIgnorePatterns?: unknown;
+}>, {
+    embeddingModel: string;
+    embeddingProvider: "openai" | "ollama" | "local";
+    openaiApiKey: string;
+    ollamaBaseUrl: string;
+    embeddingBatchSize: number;
+    indexingConcurrency: number;
+    eideticDataDir: string;
+    customExtensions: string[];
+    customIgnorePatterns: string[];
+    openaiBaseUrl?: string | undefined;
+}, {
+    embeddingProvider?: "openai" | "ollama" | "local" | undefined;
+    openaiApiKey?: string | undefined;
+    openaiBaseUrl?: string | undefined;
+    ollamaBaseUrl?: string | undefined;
+    embeddingModel?: string | undefined;
+    embeddingBatchSize?: number | undefined;
+    indexingConcurrency?: number | undefined;
+    eideticDataDir?: string | undefined;
+    customExtensions?: unknown;
+    customIgnorePatterns?: unknown;
+}>;
+export type Config = z.infer<typeof configSchema>;
+export declare function loadConfig(): Config;
+export declare function getConfig(): Config;
+export {};
+//# sourceMappingURL=config.d.ts.map

package/dist/config.js ADDED Viewed

@@ -0,0 +1,52 @@
+import os from 'node:os';
+import path from 'node:path';
+import { z } from 'zod';
+import { ConfigError } from './errors.js';
+const configSchema = z
+    .object({
+    embeddingProvider: z.enum(['openai', 'ollama', 'local']).default('openai'),
+    openaiApiKey: z.string().default(''),
+    openaiBaseUrl: z.string().optional(),
+    ollamaBaseUrl: z.string().default('http://localhost:11434/v1'),
+    embeddingModel: z.string().optional(),
+    embeddingBatchSize: z.coerce.number().int().min(1).max(2048).default(100),
+    indexingConcurrency: z.coerce.number().int().min(1).max(32).default(8),
+    eideticDataDir: z.string().default(path.join(os.homedir(), '.eidetic')),
+    customExtensions: z.preprocess((val) => (typeof val === 'string' ? JSON.parse(val) : val), z.array(z.string()).default([])),
+    customIgnorePatterns: z.preprocess((val) => (typeof val === 'string' ? JSON.parse(val) : val), z.array(z.string()).default([])),
+})
+    .transform((cfg) => ({
+    ...cfg,
+    embeddingModel: cfg.embeddingModel ??
+        (cfg.embeddingProvider === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small'),
+}));
+let cachedConfig = null;
+export function loadConfig() {
+    const raw = {
+        embeddingProvider: process.env.EMBEDDING_PROVIDER,
+        openaiApiKey: (process.env.OPENAI_API_KEY ?? '').trim().replace(/^["']|["']$/g, ''),
+        openaiBaseUrl: process.env.OPENAI_BASE_URL?.trim() ?? undefined,
+        ollamaBaseUrl: process.env.OLLAMA_BASE_URL,
+        embeddingModel: process.env.EMBEDDING_MODEL?.trim() ?? undefined,
+        embeddingBatchSize: process.env.EMBEDDING_BATCH_SIZE,
+        indexingConcurrency: process.env.INDEXING_CONCURRENCY,
+        eideticDataDir: process.env.EIDETIC_DATA_DIR,
+        customExtensions: process.env.CUSTOM_EXTENSIONS,
+        customIgnorePatterns: process.env.CUSTOM_IGNORE_PATTERNS,
+    };
+    const result = configSchema.safeParse(raw);
+    if (!result.success) {
+        const issues = result.error.issues.map((i) => `  ${i.path.join('.')}: ${i.message}`).join('\n');
+        throw new ConfigError(`Invalid configuration:\n${issues}`);
+    }
+    const config = result.data;
+    cachedConfig = config;
+    return cachedConfig;
+}
+export function getConfig() {
+    if (!cachedConfig) {
+        return loadConfig();
+    }
+    return cachedConfig;
+}
+//# sourceMappingURL=config.js.map

package/dist/core/cleanup.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { VectorDB } from '../vectordb/types.js';
+export interface CleanupResult {
+    removedFiles: string[];
+    totalRemoved: number;
+    durationMs: number;
+}
+export declare function cleanupVectors(rootPath: string, vectordb: VectorDB, onProgress?: (pct: number, msg: string) => void, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<CleanupResult>;
+//# sourceMappingURL=cleanup.d.ts.map

package/dist/core/cleanup.js ADDED Viewed

@@ -0,0 +1,41 @@
+import { scanFiles, buildSnapshot, diffSnapshots } from './sync.js';
+import { loadSnapshot, saveSnapshot } from '../vectordb/sqlite.js';
+import { pathToCollectionName } from '../paths.js';
+import { IndexingError } from '../errors.js';
+export async function cleanupVectors(rootPath, vectordb, onProgress, customExtensions, customIgnorePatterns) {
+    const startTime = Date.now();
+    const previousSnapshot = loadSnapshot(rootPath);
+    if (!previousSnapshot) {
+        throw new IndexingError(`No snapshot found for ${rootPath}. Index the codebase first before running cleanup.`);
+    }
+    onProgress?.(10, 'Scanning files on disk...');
+    const filePaths = await scanFiles(rootPath, customExtensions ?? [], customIgnorePatterns ?? []);
+    onProgress?.(40, 'Building current snapshot...');
+    const currentSnapshot = buildSnapshot(rootPath, filePaths);
+    onProgress?.(60, 'Diffing snapshots...');
+    const { removed } = diffSnapshots(previousSnapshot, currentSnapshot);
+    if (removed.length === 0) {
+        onProgress?.(100, 'No removed files found.');
+        return { removedFiles: [], totalRemoved: 0, durationMs: Date.now() - startTime };
+    }
+    const collectionName = pathToCollectionName(rootPath);
+    let deletedCount = 0;
+    for (const rel of removed) {
+        onProgress?.(60 + Math.round((deletedCount / removed.length) * 35), `Deleting vectors for ${rel}...`);
+        await vectordb.deleteByPath(collectionName, rel);
+        deletedCount++;
+    }
+    // Save updated snapshot (removes the deleted file entries)
+    const updatedSnapshot = { ...previousSnapshot };
+    for (const rel of removed) {
+        Reflect.deleteProperty(updatedSnapshot, rel);
+    }
+    saveSnapshot(rootPath, updatedSnapshot);
+    onProgress?.(100, 'Cleanup complete.');
+    return {
+        removedFiles: removed,
+        totalRemoved: removed.length,
+        durationMs: Date.now() - startTime,
+    };
+}
+//# sourceMappingURL=cleanup.js.map

package/dist/core/doc-indexer.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import type { Embedding } from '../embedding/types.js';
+import type { VectorDB } from '../vectordb/types.js';
+export interface DocIndexResult {
+    library: string;
+    topic: string;
+    source: string;
+    collectionName: string;
+    totalChunks: number;
+    estimatedTokens: number;
+    durationMs: number;
+}
+export declare function indexDocument(content: string, source: string, library: string, topic: string, embedding: Embedding, vectordb: VectorDB, ttlDays?: number): Promise<DocIndexResult>;
+//# sourceMappingURL=doc-indexer.d.ts.map

package/dist/core/doc-indexer.js ADDED Viewed

@@ -0,0 +1,76 @@
+import { randomUUID } from 'node:crypto';
+import { LineSplitter } from '../splitter/line.js';
+import { docCollectionName } from '../paths.js';
+import { upsertDocEntry } from '../state/doc-metadata.js';
+import { IndexingError } from '../errors.js';
+import { getConfig } from '../config.js';
+export async function indexDocument(content, source, library, topic, embedding, vectordb, ttlDays = 7) {
+    const start = Date.now();
+    if (!content || content.trim().length === 0) {
+        throw new IndexingError('Document content is empty.');
+    }
+    if (!source)
+        throw new IndexingError('Document source is required.');
+    if (!library)
+        throw new IndexingError('Library name is required.');
+    if (!topic)
+        throw new IndexingError('Topic is required.');
+    const collection = docCollectionName(library);
+    const config = getConfig();
+    const splitter = new LineSplitter();
+    const chunks = splitter.split(content, 'markdown', source);
+    if (chunks.length === 0) {
+        throw new IndexingError('Document produced no chunks after splitting.');
+    }
+    const exists = await vectordb.hasCollection(collection);
+    if (!exists) {
+        await vectordb.createCollection(collection, embedding.dimension);
+    }
+    try {
+        await vectordb.deleteByPath(collection, source);
+    }
+    catch {
+        // collection may be new with no matching docs
+    }
+    const batchSize = config.embeddingBatchSize;
+    let totalChunks = 0;
+    let totalTokens = 0;
+    for (let i = 0; i < chunks.length; i += batchSize) {
+        const batch = chunks.slice(i, i + batchSize);
+        const texts = batch.map((c) => c.content);
+        const estimation = embedding.estimateTokens(texts);
+        totalTokens += estimation.estimatedTokens;
+        const vectors = await embedding.embedBatch(texts);
+        const documents = batch.map((chunk, j) => ({
+            id: randomUUID(),
+            content: chunk.content,
+            vector: vectors[j],
+            relativePath: source,
+            startLine: chunk.startLine,
+            endLine: chunk.endLine,
+            fileExtension: '.md',
+            language: 'markdown',
+        }));
+        await vectordb.insert(collection, documents);
+        totalChunks += batch.length;
+    }
+    upsertDocEntry({
+        library,
+        topic,
+        source,
+        collectionName: collection,
+        indexedAt: new Date().toISOString(),
+        ttlDays,
+        totalChunks,
+    });
+    return {
+        library,
+        topic,
+        source,
+        collectionName: collection,
+        totalChunks,
+        estimatedTokens: totalTokens,
+        durationMs: Date.now() - start,
+    };
+}
+//# sourceMappingURL=doc-indexer.js.map

package/dist/core/doc-searcher.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import type { Embedding } from '../embedding/types.js';
+import type { VectorDB, SearchResult } from '../vectordb/types.js';
+export interface DocSearchResult extends SearchResult {
+    library: string;
+    topic: string;
+    source: string;
+    stale: boolean;
+}
+export declare function searchDocuments(query: string, embedding: Embedding, vectordb: VectorDB, options?: {
+    library?: string;
+    limit?: number;
+}): Promise<DocSearchResult[]>;
+//# sourceMappingURL=doc-searcher.d.ts.map

package/dist/core/doc-searcher.js ADDED Viewed

@@ -0,0 +1,65 @@
+import { docCollectionName } from '../paths.js';
+import { loadDocMetadata, isStale } from '../state/doc-metadata.js';
+import { SearchError } from '../errors.js';
+import { deduplicateResults } from './searcher.js';
+const DEFAULT_LIMIT = 5;
+const MAX_LIMIT = 20;
+export async function searchDocuments(query, embedding, vectordb, options = {}) {
+    if (!query || query.trim().length === 0) {
+        throw new SearchError('Search query is required.');
+    }
+    const limit = Math.min(Math.max(1, options.limit ?? DEFAULT_LIMIT), MAX_LIMIT);
+    const metadata = loadDocMetadata();
+    let collectionsToSearch;
+    if (options.library) {
+        const collection = docCollectionName(options.library);
+        const entries = Object.values(metadata).filter((e) => e.collectionName === collection);
+        if (entries.length === 0) {
+            throw new SearchError(`No cached documentation found for library "${options.library}". ` +
+                `Use ingest to cache documentation first.`);
+        }
+        collectionsToSearch = [{ collection, entries }];
+    }
+    else {
+        const collectionMap = new Map();
+        for (const entry of Object.values(metadata)) {
+            const existing = collectionMap.get(entry.collectionName) ?? [];
+            existing.push(entry);
+            collectionMap.set(entry.collectionName, existing);
+        }
+        if (collectionMap.size === 0) {
+            throw new SearchError('No cached documentation found. Use ingest to cache documentation first.');
+        }
+        collectionsToSearch = [...collectionMap.entries()].map(([collection, entries]) => ({
+            collection,
+            entries,
+        }));
+    }
+    const queryVector = await embedding.embed(query);
+    const overFetchLimit = Math.min(limit * 3, MAX_LIMIT);
+    const allResults = [];
+    for (const { collection, entries } of collectionsToSearch) {
+        const exists = await vectordb.hasCollection(collection);
+        if (!exists)
+            continue;
+        const results = await vectordb.search(collection, {
+            queryVector,
+            queryText: query,
+            limit: overFetchLimit,
+        });
+        for (const r of results) {
+            const matchingEntry = entries.find((e) => e.source === r.relativePath);
+            allResults.push({
+                ...r,
+                library: matchingEntry?.library ?? 'unknown',
+                topic: matchingEntry?.topic ?? 'unknown',
+                source: r.relativePath,
+                stale: matchingEntry ? isStale(matchingEntry) : false,
+            });
+        }
+    }
+    allResults.sort((a, b) => b.score - a.score);
+    const deduped = deduplicateResults(allResults, limit);
+    return deduped.map((r) => r);
+}
+//# sourceMappingURL=doc-searcher.js.map

package/dist/core/file-category.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+export type FileCategory = 'source' | 'test' | 'doc' | 'config' | 'generated';
+/**
+ * Classify a file by category based on its relative path.
+ * First match wins.
+ */
+export declare function classifyFileCategory(relativePath: string): FileCategory;
+//# sourceMappingURL=file-category.d.ts.map

package/dist/core/file-category.js ADDED Viewed

@@ -0,0 +1,75 @@
+/**
+ * Classify a file by category based on its relative path.
+ * First match wins.
+ */
+export function classifyFileCategory(relativePath) {
+    const normalized = relativePath.replace(/\\/g, '/');
+    const segments = normalized.split('/');
+    const filename = segments[segments.length - 1];
+    const lower = normalized.toLowerCase();
+    const filenameLower = filename.toLowerCase();
+    // test
+    if (lower.includes('/__tests__/') ||
+        lower.includes('.test.') ||
+        lower.includes('.spec.') ||
+        lower.includes('_test.') ||
+        lower.includes('_spec.') ||
+        filenameLower.startsWith('test_') ||
+        filenameLower.startsWith('test-')) {
+        return 'test';
+    }
+    // doc
+    const ext = filename.includes('.') ? filename.slice(filename.lastIndexOf('.')).toLowerCase() : '';
+    if (['.md', '.mdx', '.rst', '.txt'].includes(ext) ||
+        segments.some((s) => s.toLowerCase() === 'docs' || s.toLowerCase() === 'doc') ||
+        /^readme/i.test(filename) ||
+        /^changelog/i.test(filename) ||
+        /^license/i.test(filename)) {
+        return 'doc';
+    }
+    // generated
+    if (lower.includes('/dist/') ||
+        lower.startsWith('dist/') ||
+        lower.includes('/build/') ||
+        lower.startsWith('build/') ||
+        lower.includes('/generated/') ||
+        lower.startsWith('generated/') ||
+        lower.includes('.generated.') ||
+        /\.[gG]\./.test(filename)) {
+        return 'generated';
+    }
+    // config
+    if (isConfigFile(normalized, filename, ext, segments)) {
+        return 'config';
+    }
+    return 'source';
+}
+function isConfigFile(normalized, filename, ext, segments) {
+    const filenameLower = filename.toLowerCase();
+    // Explicit filename matches
+    if (filenameLower === 'package.json')
+        return true;
+    if (filenameLower === 'makefile')
+        return true;
+    if (filenameLower === 'dockerfile')
+        return true;
+    if (/^tsconfig.*\.json$/.test(filenameLower))
+        return true;
+    if (filenameLower.startsWith('docker-compose'))
+        return true;
+    if (filenameLower.startsWith('.eslintrc'))
+        return true;
+    if (filenameLower.startsWith('.prettierrc'))
+        return true;
+    // *.config.* pattern
+    if (filename.includes('.config.'))
+        return true;
+    // .yaml/.yml/.toml not under src/
+    if (['.yaml', '.yml', '.toml'].includes(ext)) {
+        const underSrc = segments.some((s) => s.toLowerCase() === 'src');
+        if (!underSrc)
+            return true;
+    }
+    return false;
+}
+//# sourceMappingURL=file-category.js.map

package/dist/core/indexer.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import type { Embedding } from '../embedding/types.js';
+import type { VectorDB } from '../vectordb/types.js';
+export { previewCodebase, type PreviewResult } from './preview.js';
+export { saveSnapshot, deleteSnapshot, snapshotExists } from '../vectordb/sqlite.js';
+export interface IndexResult {
+    totalFiles: number;
+    totalChunks: number;
+    addedFiles: number;
+    modifiedFiles: number;
+    removedFiles: number;
+    skippedFiles: number;
+    estimatedTokens: number;
+    estimatedCostUsd: number;
+    durationMs: number;
+    parseFailures: string[];
+}
+export declare function indexCodebase(rootPath: string, embedding: Embedding, vectordb: VectorDB, force?: boolean, onProgress?: (pct: number, msg: string) => void, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<IndexResult>;
+//# sourceMappingURL=indexer.d.ts.map

package/dist/core/indexer.js ADDED Viewed

@@ -0,0 +1,177 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { randomUUID } from 'node:crypto';
+import { AstSplitter } from '../splitter/ast.js';
+import { LineSplitter } from '../splitter/line.js';
+import { scanFiles, buildSnapshot, diffSnapshots, extensionToLanguage } from './sync.js';
+import { getConfig } from '../config.js';
+import { normalizePath, pathToCollectionName } from '../paths.js';
+import { IndexingError } from '../errors.js';
+import { classifyFileCategory } from './file-category.js';
+import { loadSnapshot, saveSnapshot } from '../vectordb/sqlite.js';
+export { previewCodebase } from './preview.js';
+export { saveSnapshot, deleteSnapshot, snapshotExists } from '../vectordb/sqlite.js';
+export async function indexCodebase(rootPath, embedding, vectordb, force = false, onProgress, customExtensions, customIgnorePatterns) {
+    const start = Date.now();
+    const normalizedPath = normalizePath(rootPath);
+    const collectionName = pathToCollectionName(normalizedPath);
+    const config = getConfig();
+    onProgress?.(0, 'Scanning files...');
+    const filePaths = await scanFiles(normalizedPath, customExtensions, customIgnorePatterns);
+    if (filePaths.length === 0) {
+        throw new IndexingError(`No indexable files found in ${normalizedPath}`);
+    }
+    const currentSnapshot = buildSnapshot(normalizedPath, filePaths);
+    let filesToProcess;
+    let removedFiles = [];
+    let addedCount = 0;
+    let modifiedCount = 0;
+    if (force) {
+        onProgress?.(5, 'Dropping existing index...');
+        await vectordb.dropCollection(collectionName);
+        await vectordb.createCollection(collectionName, embedding.dimension);
+        filesToProcess = filePaths;
+        addedCount = filePaths.length;
+    }
+    else {
+        const previousSnapshot = loadSnapshot(normalizedPath);
+        if (!previousSnapshot || !(await vectordb.hasCollection(collectionName))) {
+            // First time indexing
+            await vectordb.createCollection(collectionName, embedding.dimension);
+            filesToProcess = filePaths;
+            addedCount = filePaths.length;
+        }
+        else {
+            const diff = diffSnapshots(previousSnapshot, currentSnapshot);
+            addedCount = diff.added.length;
+            modifiedCount = diff.modified.length;
+            removedFiles = diff.removed;
+            const toDelete = [...diff.removed, ...diff.modified];
+            for (const rel of toDelete) {
+                await vectordb.deleteByPath(collectionName, rel);
+            }
+            filesToProcess = [...diff.added, ...diff.modified];
+        }
+    }
+    if (filesToProcess.length === 0) {
+        saveSnapshot(normalizedPath, currentSnapshot);
+        return {
+            totalFiles: filePaths.length,
+            totalChunks: 0,
+            addedFiles: 0,
+            modifiedFiles: 0,
+            removedFiles: removedFiles.length,
+            skippedFiles: filePaths.length,
+            estimatedTokens: 0,
+            estimatedCostUsd: 0,
+            durationMs: Date.now() - start,
+            parseFailures: [],
+        };
+    }
+    onProgress?.(10, `Splitting ${filesToProcess.length} files...`);
+    const astSplitter = new AstSplitter();
+    const lineSplitter = new LineSplitter();
+    const allChunks = [];
+    const parseFailures = [];
+    const concurrency = config.indexingConcurrency;
+    for (let i = 0; i < filesToProcess.length; i += concurrency) {
+        const batch = filesToProcess.slice(i, i + concurrency);
+        const batchResults = await Promise.all(
+        // eslint-disable-next-line @typescript-eslint/require-await
+        batch.map(async (relPath) => {
+            const fullPath = path.join(normalizedPath, relPath);
+            try {
+                const code = fs.readFileSync(fullPath, 'utf-8');
+                if (code.trim().length === 0)
+                    return { chunks: [], failed: false };
+                const ext = path.extname(relPath);
+                const language = extensionToLanguage(ext);
+                let chunks = astSplitter.split(code, language, relPath);
+                if (chunks.length === 0) {
+                    chunks = lineSplitter.split(code, language, relPath);
+                }
+                if (chunks.length === 0)
+                    return { chunks: [], failed: true };
+                return { chunks, failed: false };
+            }
+            catch (err) {
+                console.warn(`Failed to process "${relPath}":`, err);
+                return { chunks: [], failed: true };
+            }
+        }));
+        for (let j = 0; j < batchResults.length; j++) {
+            const { chunks, failed } = batchResults[j];
+            allChunks.push(...chunks);
+            if (failed)
+                parseFailures.push(batch[j]);
+        }
+    }
+    if (parseFailures.length > 0) {
+        console.warn(`Warning: ${parseFailures.length} file(s) produced no chunks: ${parseFailures.slice(0, 10).join(', ')}` +
+            (parseFailures.length > 10 ? ` (and ${parseFailures.length - 10} more)` : ''));
+    }
+    if (allChunks.length === 0) {
+        saveSnapshot(normalizedPath, currentSnapshot);
+        return {
+            totalFiles: filePaths.length,
+            totalChunks: 0,
+            addedFiles: addedCount,
+            modifiedFiles: modifiedCount,
+            removedFiles: removedFiles.length,
+            skippedFiles: filePaths.length - filesToProcess.length,
+            estimatedTokens: 0,
+            estimatedCostUsd: 0,
+            durationMs: Date.now() - start,
+            parseFailures,
+        };
+    }
+    const chunkTexts = allChunks.map((c) => c.content);
+    const estimation = embedding.estimateTokens(chunkTexts);
+    console.log(`Indexing ${filesToProcess.length} files -> ${allChunks.length} chunks -> ` +
+        `~${(estimation.estimatedTokens / 1000).toFixed(0)}K tokens (~$${estimation.estimatedCostUsd.toFixed(4)})`);
+    const batchSize = config.embeddingBatchSize;
+    let processedChunks = 0;
+    for (let i = 0; i < allChunks.length; i += batchSize) {
+        const batch = allChunks.slice(i, i + batchSize);
+        const texts = batch.map((c) => c.content);
+        const pct = 10 + Math.round((i / allChunks.length) * 85);
+        onProgress?.(pct, `Embedding batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(allChunks.length / batchSize)}...`);
+        const vectors = await embedding.embedBatch(texts);
+        if (vectors.length !== texts.length) {
+            throw new IndexingError(`Embedding dimension mismatch: sent ${texts.length} texts, got ${vectors.length} vectors`);
+        }
+        const documents = batch.map((chunk, j) => ({
+            id: randomUUID(),
+            content: chunk.content,
+            vector: vectors[j],
+            relativePath: chunk.filePath,
+            startLine: chunk.startLine,
+            endLine: chunk.endLine,
+            fileExtension: path.extname(chunk.filePath),
+            language: chunk.language,
+            fileCategory: classifyFileCategory(chunk.filePath),
+            symbolName: chunk.symbolName,
+            symbolKind: chunk.symbolKind,
+            symbolSignature: chunk.symbolSignature,
+            parentSymbol: chunk.parentSymbol,
+        }));
+        await vectordb.insert(collectionName, documents);
+        processedChunks += batch.length;
+    }
+    onProgress?.(95, 'Saving snapshot...');
+    saveSnapshot(normalizedPath, currentSnapshot);
+    onProgress?.(100, 'Done');
+    return {
+        totalFiles: filePaths.length,
+        totalChunks: processedChunks,
+        addedFiles: addedCount,
+        modifiedFiles: modifiedCount,
+        removedFiles: removedFiles.length,
+        skippedFiles: filePaths.length - filesToProcess.length,
+        estimatedTokens: estimation.estimatedTokens,
+        estimatedCostUsd: estimation.estimatedCostUsd,
+        durationMs: Date.now() - start,
+        parseFailures,
+    };
+}
+//# sourceMappingURL=indexer.js.map

package/dist/core/preview.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+export interface PreviewResult {
+    totalFiles: number;
+    byExtension: Record<string, number>;
+    topDirectories: {
+        dir: string;
+        count: number;
+    }[];
+    estimatedTokens: number;
+    estimatedCostUsd: number;
+    warnings: string[];
+}
+export declare function previewCodebase(rootPath: string, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<PreviewResult>;
+//# sourceMappingURL=preview.d.ts.map