@tai-io/codesearch 2026.313.1614

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/build-info.d.ts +3 -0
  2. package/dist/build-info.js +4 -0
  3. package/dist/config.d.ts +62 -0
  4. package/dist/config.js +52 -0
  5. package/dist/core/cleanup.d.ts +8 -0
  6. package/dist/core/cleanup.js +41 -0
  7. package/dist/core/doc-indexer.d.ts +13 -0
  8. package/dist/core/doc-indexer.js +76 -0
  9. package/dist/core/doc-searcher.d.ts +13 -0
  10. package/dist/core/doc-searcher.js +65 -0
  11. package/dist/core/file-category.d.ts +7 -0
  12. package/dist/core/file-category.js +75 -0
  13. package/dist/core/indexer.d.ts +18 -0
  14. package/dist/core/indexer.js +177 -0
  15. package/dist/core/preview.d.ts +13 -0
  16. package/dist/core/preview.js +58 -0
  17. package/dist/core/repo-map.d.ts +33 -0
  18. package/dist/core/repo-map.js +144 -0
  19. package/dist/core/searcher.d.ts +12 -0
  20. package/dist/core/searcher.js +97 -0
  21. package/dist/core/sync.d.ts +15 -0
  22. package/dist/core/sync.js +212 -0
  23. package/dist/core/targeted-indexer.d.ts +19 -0
  24. package/dist/core/targeted-indexer.js +127 -0
  25. package/dist/embedding/factory.d.ts +4 -0
  26. package/dist/embedding/factory.js +24 -0
  27. package/dist/embedding/openai.d.ts +33 -0
  28. package/dist/embedding/openai.js +234 -0
  29. package/dist/embedding/truncate.d.ts +6 -0
  30. package/dist/embedding/truncate.js +14 -0
  31. package/dist/embedding/types.d.ts +18 -0
  32. package/dist/embedding/types.js +2 -0
  33. package/dist/errors.d.ts +17 -0
  34. package/dist/errors.js +21 -0
  35. package/dist/format.d.ts +18 -0
  36. package/dist/format.js +151 -0
  37. package/dist/hooks/cli-router.d.ts +7 -0
  38. package/dist/hooks/cli-router.js +47 -0
  39. package/dist/hooks/hook-output.d.ts +56 -0
  40. package/dist/hooks/hook-output.js +21 -0
  41. package/dist/hooks/post-tool-use.d.ts +13 -0
  42. package/dist/hooks/post-tool-use.js +123 -0
  43. package/dist/hooks/stop-hook.d.ts +11 -0
  44. package/dist/hooks/stop-hook.js +137 -0
  45. package/dist/hooks/targeted-runner.d.ts +11 -0
  46. package/dist/hooks/targeted-runner.js +58 -0
  47. package/dist/index.d.ts +3 -0
  48. package/dist/index.js +138 -0
  49. package/dist/paths.d.ts +11 -0
  50. package/dist/paths.js +54 -0
  51. package/dist/setup-message.d.ts +4 -0
  52. package/dist/setup-message.js +48 -0
  53. package/dist/splitter/ast.d.ts +13 -0
  54. package/dist/splitter/ast.js +231 -0
  55. package/dist/splitter/line.d.ts +10 -0
  56. package/dist/splitter/line.js +103 -0
  57. package/dist/splitter/symbol-extract.d.ts +16 -0
  58. package/dist/splitter/symbol-extract.js +61 -0
  59. package/dist/splitter/types.d.ts +16 -0
  60. package/dist/splitter/types.js +2 -0
  61. package/dist/state/doc-metadata.d.ts +18 -0
  62. package/dist/state/doc-metadata.js +59 -0
  63. package/dist/state/registry.d.ts +7 -0
  64. package/dist/state/registry.js +46 -0
  65. package/dist/state/snapshot.d.ts +26 -0
  66. package/dist/state/snapshot.js +100 -0
  67. package/dist/tool-schemas.d.ts +215 -0
  68. package/dist/tool-schemas.js +269 -0
  69. package/dist/tools.d.ts +58 -0
  70. package/dist/tools.js +245 -0
  71. package/dist/vectordb/rrf.d.ts +32 -0
  72. package/dist/vectordb/rrf.js +88 -0
  73. package/dist/vectordb/sqlite.d.ts +34 -0
  74. package/dist/vectordb/sqlite.js +624 -0
  75. package/dist/vectordb/types.d.ts +63 -0
  76. package/dist/vectordb/types.js +2 -0
  77. package/messages.yaml +69 -0
  78. package/package.json +79 -0
@@ -0,0 +1,3 @@
1
+ export declare const BUILD_VERSION = "2026.313.1614";
2
+ export declare const BUILD_TIMESTAMP = "2026-03-13T16:14:48.402Z";
3
+ //# sourceMappingURL=build-info.d.ts.map
@@ -0,0 +1,4 @@
1
+ // Auto-generated by scripts/generate-build-info.ts — do not edit
2
+ export const BUILD_VERSION = '2026.313.1614';
3
+ export const BUILD_TIMESTAMP = '2026-03-13T16:14:48.402Z';
4
+ //# sourceMappingURL=build-info.js.map
@@ -0,0 +1,62 @@
1
+ import { z } from 'zod';
2
+ declare const configSchema: z.ZodEffects<z.ZodObject<{
3
+ embeddingProvider: z.ZodDefault<z.ZodEnum<["openai", "ollama", "local"]>>;
4
+ openaiApiKey: z.ZodDefault<z.ZodString>;
5
+ openaiBaseUrl: z.ZodOptional<z.ZodString>;
6
+ ollamaBaseUrl: z.ZodDefault<z.ZodString>;
7
+ embeddingModel: z.ZodOptional<z.ZodString>;
8
+ embeddingBatchSize: z.ZodDefault<z.ZodNumber>;
9
+ indexingConcurrency: z.ZodDefault<z.ZodNumber>;
10
+ eideticDataDir: z.ZodDefault<z.ZodString>;
11
+ customExtensions: z.ZodEffects<z.ZodDefault<z.ZodArray<z.ZodString, "many">>, string[], unknown>;
12
+ customIgnorePatterns: z.ZodEffects<z.ZodDefault<z.ZodArray<z.ZodString, "many">>, string[], unknown>;
13
+ }, "strip", z.ZodTypeAny, {
14
+ embeddingProvider: "openai" | "ollama" | "local";
15
+ openaiApiKey: string;
16
+ ollamaBaseUrl: string;
17
+ embeddingBatchSize: number;
18
+ indexingConcurrency: number;
19
+ eideticDataDir: string;
20
+ customExtensions: string[];
21
+ customIgnorePatterns: string[];
22
+ openaiBaseUrl?: string | undefined;
23
+ embeddingModel?: string | undefined;
24
+ }, {
25
+ embeddingProvider?: "openai" | "ollama" | "local" | undefined;
26
+ openaiApiKey?: string | undefined;
27
+ openaiBaseUrl?: string | undefined;
28
+ ollamaBaseUrl?: string | undefined;
29
+ embeddingModel?: string | undefined;
30
+ embeddingBatchSize?: number | undefined;
31
+ indexingConcurrency?: number | undefined;
32
+ eideticDataDir?: string | undefined;
33
+ customExtensions?: unknown;
34
+ customIgnorePatterns?: unknown;
35
+ }>, {
36
+ embeddingModel: string;
37
+ embeddingProvider: "openai" | "ollama" | "local";
38
+ openaiApiKey: string;
39
+ ollamaBaseUrl: string;
40
+ embeddingBatchSize: number;
41
+ indexingConcurrency: number;
42
+ eideticDataDir: string;
43
+ customExtensions: string[];
44
+ customIgnorePatterns: string[];
45
+ openaiBaseUrl?: string | undefined;
46
+ }, {
47
+ embeddingProvider?: "openai" | "ollama" | "local" | undefined;
48
+ openaiApiKey?: string | undefined;
49
+ openaiBaseUrl?: string | undefined;
50
+ ollamaBaseUrl?: string | undefined;
51
+ embeddingModel?: string | undefined;
52
+ embeddingBatchSize?: number | undefined;
53
+ indexingConcurrency?: number | undefined;
54
+ eideticDataDir?: string | undefined;
55
+ customExtensions?: unknown;
56
+ customIgnorePatterns?: unknown;
57
+ }>;
58
+ export type Config = z.infer<typeof configSchema>;
59
+ export declare function loadConfig(): Config;
60
+ export declare function getConfig(): Config;
61
+ export {};
62
+ //# sourceMappingURL=config.d.ts.map
package/dist/config.js ADDED
@@ -0,0 +1,52 @@
1
+ import os from 'node:os';
2
+ import path from 'node:path';
3
+ import { z } from 'zod';
4
+ import { ConfigError } from './errors.js';
5
+ const configSchema = z
6
+ .object({
7
+ embeddingProvider: z.enum(['openai', 'ollama', 'local']).default('openai'),
8
+ openaiApiKey: z.string().default(''),
9
+ openaiBaseUrl: z.string().optional(),
10
+ ollamaBaseUrl: z.string().default('http://localhost:11434/v1'),
11
+ embeddingModel: z.string().optional(),
12
+ embeddingBatchSize: z.coerce.number().int().min(1).max(2048).default(100),
13
+ indexingConcurrency: z.coerce.number().int().min(1).max(32).default(8),
14
+ eideticDataDir: z.string().default(path.join(os.homedir(), '.eidetic')),
15
+ customExtensions: z.preprocess((val) => (typeof val === 'string' ? JSON.parse(val) : val), z.array(z.string()).default([])),
16
+ customIgnorePatterns: z.preprocess((val) => (typeof val === 'string' ? JSON.parse(val) : val), z.array(z.string()).default([])),
17
+ })
18
+ .transform((cfg) => ({
19
+ ...cfg,
20
+ embeddingModel: cfg.embeddingModel ??
21
+ (cfg.embeddingProvider === 'ollama' ? 'nomic-embed-text' : 'text-embedding-3-small'),
22
+ }));
23
+ let cachedConfig = null;
24
+ export function loadConfig() {
25
+ const raw = {
26
+ embeddingProvider: process.env.EMBEDDING_PROVIDER,
27
+ openaiApiKey: (process.env.OPENAI_API_KEY ?? '').trim().replace(/^["']|["']$/g, ''),
28
+ openaiBaseUrl: process.env.OPENAI_BASE_URL?.trim() ?? undefined,
29
+ ollamaBaseUrl: process.env.OLLAMA_BASE_URL,
30
+ embeddingModel: process.env.EMBEDDING_MODEL?.trim() ?? undefined,
31
+ embeddingBatchSize: process.env.EMBEDDING_BATCH_SIZE,
32
+ indexingConcurrency: process.env.INDEXING_CONCURRENCY,
33
+ eideticDataDir: process.env.EIDETIC_DATA_DIR,
34
+ customExtensions: process.env.CUSTOM_EXTENSIONS,
35
+ customIgnorePatterns: process.env.CUSTOM_IGNORE_PATTERNS,
36
+ };
37
+ const result = configSchema.safeParse(raw);
38
+ if (!result.success) {
39
+ const issues = result.error.issues.map((i) => ` ${i.path.join('.')}: ${i.message}`).join('\n');
40
+ throw new ConfigError(`Invalid configuration:\n${issues}`);
41
+ }
42
+ const config = result.data;
43
+ cachedConfig = config;
44
+ return cachedConfig;
45
+ }
46
+ export function getConfig() {
47
+ if (!cachedConfig) {
48
+ return loadConfig();
49
+ }
50
+ return cachedConfig;
51
+ }
52
+ //# sourceMappingURL=config.js.map
@@ -0,0 +1,8 @@
1
+ import type { VectorDB } from '../vectordb/types.js';
2
+ export interface CleanupResult {
3
+ removedFiles: string[];
4
+ totalRemoved: number;
5
+ durationMs: number;
6
+ }
7
+ export declare function cleanupVectors(rootPath: string, vectordb: VectorDB, onProgress?: (pct: number, msg: string) => void, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<CleanupResult>;
8
+ //# sourceMappingURL=cleanup.d.ts.map
@@ -0,0 +1,41 @@
1
+ import { scanFiles, buildSnapshot, diffSnapshots } from './sync.js';
2
+ import { loadSnapshot, saveSnapshot } from '../vectordb/sqlite.js';
3
+ import { pathToCollectionName } from '../paths.js';
4
+ import { IndexingError } from '../errors.js';
5
+ export async function cleanupVectors(rootPath, vectordb, onProgress, customExtensions, customIgnorePatterns) {
6
+ const startTime = Date.now();
7
+ const previousSnapshot = loadSnapshot(rootPath);
8
+ if (!previousSnapshot) {
9
+ throw new IndexingError(`No snapshot found for ${rootPath}. Index the codebase first before running cleanup.`);
10
+ }
11
+ onProgress?.(10, 'Scanning files on disk...');
12
+ const filePaths = await scanFiles(rootPath, customExtensions ?? [], customIgnorePatterns ?? []);
13
+ onProgress?.(40, 'Building current snapshot...');
14
+ const currentSnapshot = buildSnapshot(rootPath, filePaths);
15
+ onProgress?.(60, 'Diffing snapshots...');
16
+ const { removed } = diffSnapshots(previousSnapshot, currentSnapshot);
17
+ if (removed.length === 0) {
18
+ onProgress?.(100, 'No removed files found.');
19
+ return { removedFiles: [], totalRemoved: 0, durationMs: Date.now() - startTime };
20
+ }
21
+ const collectionName = pathToCollectionName(rootPath);
22
+ let deletedCount = 0;
23
+ for (const rel of removed) {
24
+ onProgress?.(60 + Math.round((deletedCount / removed.length) * 35), `Deleting vectors for ${rel}...`);
25
+ await vectordb.deleteByPath(collectionName, rel);
26
+ deletedCount++;
27
+ }
28
+ // Save updated snapshot (removes the deleted file entries)
29
+ const updatedSnapshot = { ...previousSnapshot };
30
+ for (const rel of removed) {
31
+ Reflect.deleteProperty(updatedSnapshot, rel);
32
+ }
33
+ saveSnapshot(rootPath, updatedSnapshot);
34
+ onProgress?.(100, 'Cleanup complete.');
35
+ return {
36
+ removedFiles: removed,
37
+ totalRemoved: removed.length,
38
+ durationMs: Date.now() - startTime,
39
+ };
40
+ }
41
+ //# sourceMappingURL=cleanup.js.map
@@ -0,0 +1,13 @@
1
+ import type { Embedding } from '../embedding/types.js';
2
+ import type { VectorDB } from '../vectordb/types.js';
3
+ export interface DocIndexResult {
4
+ library: string;
5
+ topic: string;
6
+ source: string;
7
+ collectionName: string;
8
+ totalChunks: number;
9
+ estimatedTokens: number;
10
+ durationMs: number;
11
+ }
12
+ export declare function indexDocument(content: string, source: string, library: string, topic: string, embedding: Embedding, vectordb: VectorDB, ttlDays?: number): Promise<DocIndexResult>;
13
+ //# sourceMappingURL=doc-indexer.d.ts.map
@@ -0,0 +1,76 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import { LineSplitter } from '../splitter/line.js';
3
+ import { docCollectionName } from '../paths.js';
4
+ import { upsertDocEntry } from '../state/doc-metadata.js';
5
+ import { IndexingError } from '../errors.js';
6
+ import { getConfig } from '../config.js';
7
+ export async function indexDocument(content, source, library, topic, embedding, vectordb, ttlDays = 7) {
8
+ const start = Date.now();
9
+ if (!content || content.trim().length === 0) {
10
+ throw new IndexingError('Document content is empty.');
11
+ }
12
+ if (!source)
13
+ throw new IndexingError('Document source is required.');
14
+ if (!library)
15
+ throw new IndexingError('Library name is required.');
16
+ if (!topic)
17
+ throw new IndexingError('Topic is required.');
18
+ const collection = docCollectionName(library);
19
+ const config = getConfig();
20
+ const splitter = new LineSplitter();
21
+ const chunks = splitter.split(content, 'markdown', source);
22
+ if (chunks.length === 0) {
23
+ throw new IndexingError('Document produced no chunks after splitting.');
24
+ }
25
+ const exists = await vectordb.hasCollection(collection);
26
+ if (!exists) {
27
+ await vectordb.createCollection(collection, embedding.dimension);
28
+ }
29
+ try {
30
+ await vectordb.deleteByPath(collection, source);
31
+ }
32
+ catch {
33
+ // collection may be new with no matching docs
34
+ }
35
+ const batchSize = config.embeddingBatchSize;
36
+ let totalChunks = 0;
37
+ let totalTokens = 0;
38
+ for (let i = 0; i < chunks.length; i += batchSize) {
39
+ const batch = chunks.slice(i, i + batchSize);
40
+ const texts = batch.map((c) => c.content);
41
+ const estimation = embedding.estimateTokens(texts);
42
+ totalTokens += estimation.estimatedTokens;
43
+ const vectors = await embedding.embedBatch(texts);
44
+ const documents = batch.map((chunk, j) => ({
45
+ id: randomUUID(),
46
+ content: chunk.content,
47
+ vector: vectors[j],
48
+ relativePath: source,
49
+ startLine: chunk.startLine,
50
+ endLine: chunk.endLine,
51
+ fileExtension: '.md',
52
+ language: 'markdown',
53
+ }));
54
+ await vectordb.insert(collection, documents);
55
+ totalChunks += batch.length;
56
+ }
57
+ upsertDocEntry({
58
+ library,
59
+ topic,
60
+ source,
61
+ collectionName: collection,
62
+ indexedAt: new Date().toISOString(),
63
+ ttlDays,
64
+ totalChunks,
65
+ });
66
+ return {
67
+ library,
68
+ topic,
69
+ source,
70
+ collectionName: collection,
71
+ totalChunks,
72
+ estimatedTokens: totalTokens,
73
+ durationMs: Date.now() - start,
74
+ };
75
+ }
76
+ //# sourceMappingURL=doc-indexer.js.map
@@ -0,0 +1,13 @@
1
+ import type { Embedding } from '../embedding/types.js';
2
+ import type { VectorDB, SearchResult } from '../vectordb/types.js';
3
+ export interface DocSearchResult extends SearchResult {
4
+ library: string;
5
+ topic: string;
6
+ source: string;
7
+ stale: boolean;
8
+ }
9
+ export declare function searchDocuments(query: string, embedding: Embedding, vectordb: VectorDB, options?: {
10
+ library?: string;
11
+ limit?: number;
12
+ }): Promise<DocSearchResult[]>;
13
+ //# sourceMappingURL=doc-searcher.d.ts.map
@@ -0,0 +1,65 @@
1
+ import { docCollectionName } from '../paths.js';
2
+ import { loadDocMetadata, isStale } from '../state/doc-metadata.js';
3
+ import { SearchError } from '../errors.js';
4
+ import { deduplicateResults } from './searcher.js';
5
+ const DEFAULT_LIMIT = 5;
6
+ const MAX_LIMIT = 20;
7
+ export async function searchDocuments(query, embedding, vectordb, options = {}) {
8
+ if (!query || query.trim().length === 0) {
9
+ throw new SearchError('Search query is required.');
10
+ }
11
+ const limit = Math.min(Math.max(1, options.limit ?? DEFAULT_LIMIT), MAX_LIMIT);
12
+ const metadata = loadDocMetadata();
13
+ let collectionsToSearch;
14
+ if (options.library) {
15
+ const collection = docCollectionName(options.library);
16
+ const entries = Object.values(metadata).filter((e) => e.collectionName === collection);
17
+ if (entries.length === 0) {
18
+ throw new SearchError(`No cached documentation found for library "${options.library}". ` +
19
+ `Use ingest to cache documentation first.`);
20
+ }
21
+ collectionsToSearch = [{ collection, entries }];
22
+ }
23
+ else {
24
+ const collectionMap = new Map();
25
+ for (const entry of Object.values(metadata)) {
26
+ const existing = collectionMap.get(entry.collectionName) ?? [];
27
+ existing.push(entry);
28
+ collectionMap.set(entry.collectionName, existing);
29
+ }
30
+ if (collectionMap.size === 0) {
31
+ throw new SearchError('No cached documentation found. Use ingest to cache documentation first.');
32
+ }
33
+ collectionsToSearch = [...collectionMap.entries()].map(([collection, entries]) => ({
34
+ collection,
35
+ entries,
36
+ }));
37
+ }
38
+ const queryVector = await embedding.embed(query);
39
+ const overFetchLimit = Math.min(limit * 3, MAX_LIMIT);
40
+ const allResults = [];
41
+ for (const { collection, entries } of collectionsToSearch) {
42
+ const exists = await vectordb.hasCollection(collection);
43
+ if (!exists)
44
+ continue;
45
+ const results = await vectordb.search(collection, {
46
+ queryVector,
47
+ queryText: query,
48
+ limit: overFetchLimit,
49
+ });
50
+ for (const r of results) {
51
+ const matchingEntry = entries.find((e) => e.source === r.relativePath);
52
+ allResults.push({
53
+ ...r,
54
+ library: matchingEntry?.library ?? 'unknown',
55
+ topic: matchingEntry?.topic ?? 'unknown',
56
+ source: r.relativePath,
57
+ stale: matchingEntry ? isStale(matchingEntry) : false,
58
+ });
59
+ }
60
+ }
61
+ allResults.sort((a, b) => b.score - a.score);
62
+ const deduped = deduplicateResults(allResults, limit);
63
+ return deduped.map((r) => r);
64
+ }
65
+ //# sourceMappingURL=doc-searcher.js.map
@@ -0,0 +1,7 @@
1
+ export type FileCategory = 'source' | 'test' | 'doc' | 'config' | 'generated';
2
+ /**
3
+ * Classify a file by category based on its relative path.
4
+ * First match wins.
5
+ */
6
+ export declare function classifyFileCategory(relativePath: string): FileCategory;
7
+ //# sourceMappingURL=file-category.d.ts.map
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Classify a file by category based on its relative path.
3
+ * First match wins.
4
+ */
5
+ export function classifyFileCategory(relativePath) {
6
+ const normalized = relativePath.replace(/\\/g, '/');
7
+ const segments = normalized.split('/');
8
+ const filename = segments[segments.length - 1];
9
+ const lower = normalized.toLowerCase();
10
+ const filenameLower = filename.toLowerCase();
11
+ // test
12
+ if (lower.includes('/__tests__/') ||
13
+ lower.includes('.test.') ||
14
+ lower.includes('.spec.') ||
15
+ lower.includes('_test.') ||
16
+ lower.includes('_spec.') ||
17
+ filenameLower.startsWith('test_') ||
18
+ filenameLower.startsWith('test-')) {
19
+ return 'test';
20
+ }
21
+ // doc
22
+ const ext = filename.includes('.') ? filename.slice(filename.lastIndexOf('.')).toLowerCase() : '';
23
+ if (['.md', '.mdx', '.rst', '.txt'].includes(ext) ||
24
+ segments.some((s) => s.toLowerCase() === 'docs' || s.toLowerCase() === 'doc') ||
25
+ /^readme/i.test(filename) ||
26
+ /^changelog/i.test(filename) ||
27
+ /^license/i.test(filename)) {
28
+ return 'doc';
29
+ }
30
+ // generated
31
+ if (lower.includes('/dist/') ||
32
+ lower.startsWith('dist/') ||
33
+ lower.includes('/build/') ||
34
+ lower.startsWith('build/') ||
35
+ lower.includes('/generated/') ||
36
+ lower.startsWith('generated/') ||
37
+ lower.includes('.generated.') ||
38
+ /\.[gG]\./.test(filename)) {
39
+ return 'generated';
40
+ }
41
+ // config
42
+ if (isConfigFile(normalized, filename, ext, segments)) {
43
+ return 'config';
44
+ }
45
+ return 'source';
46
+ }
47
+ function isConfigFile(normalized, filename, ext, segments) {
48
+ const filenameLower = filename.toLowerCase();
49
+ // Explicit filename matches
50
+ if (filenameLower === 'package.json')
51
+ return true;
52
+ if (filenameLower === 'makefile')
53
+ return true;
54
+ if (filenameLower === 'dockerfile')
55
+ return true;
56
+ if (/^tsconfig.*\.json$/.test(filenameLower))
57
+ return true;
58
+ if (filenameLower.startsWith('docker-compose'))
59
+ return true;
60
+ if (filenameLower.startsWith('.eslintrc'))
61
+ return true;
62
+ if (filenameLower.startsWith('.prettierrc'))
63
+ return true;
64
+ // *.config.* pattern
65
+ if (filename.includes('.config.'))
66
+ return true;
67
+ // .yaml/.yml/.toml not under src/
68
+ if (['.yaml', '.yml', '.toml'].includes(ext)) {
69
+ const underSrc = segments.some((s) => s.toLowerCase() === 'src');
70
+ if (!underSrc)
71
+ return true;
72
+ }
73
+ return false;
74
+ }
75
+ //# sourceMappingURL=file-category.js.map
@@ -0,0 +1,18 @@
1
+ import type { Embedding } from '../embedding/types.js';
2
+ import type { VectorDB } from '../vectordb/types.js';
3
+ export { previewCodebase, type PreviewResult } from './preview.js';
4
+ export { saveSnapshot, deleteSnapshot, snapshotExists } from '../vectordb/sqlite.js';
5
+ export interface IndexResult {
6
+ totalFiles: number;
7
+ totalChunks: number;
8
+ addedFiles: number;
9
+ modifiedFiles: number;
10
+ removedFiles: number;
11
+ skippedFiles: number;
12
+ estimatedTokens: number;
13
+ estimatedCostUsd: number;
14
+ durationMs: number;
15
+ parseFailures: string[];
16
+ }
17
+ export declare function indexCodebase(rootPath: string, embedding: Embedding, vectordb: VectorDB, force?: boolean, onProgress?: (pct: number, msg: string) => void, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<IndexResult>;
18
+ //# sourceMappingURL=indexer.d.ts.map
@@ -0,0 +1,177 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { randomUUID } from 'node:crypto';
4
+ import { AstSplitter } from '../splitter/ast.js';
5
+ import { LineSplitter } from '../splitter/line.js';
6
+ import { scanFiles, buildSnapshot, diffSnapshots, extensionToLanguage } from './sync.js';
7
+ import { getConfig } from '../config.js';
8
+ import { normalizePath, pathToCollectionName } from '../paths.js';
9
+ import { IndexingError } from '../errors.js';
10
+ import { classifyFileCategory } from './file-category.js';
11
+ import { loadSnapshot, saveSnapshot } from '../vectordb/sqlite.js';
12
+ export { previewCodebase } from './preview.js';
13
+ export { saveSnapshot, deleteSnapshot, snapshotExists } from '../vectordb/sqlite.js';
14
+ export async function indexCodebase(rootPath, embedding, vectordb, force = false, onProgress, customExtensions, customIgnorePatterns) {
15
+ const start = Date.now();
16
+ const normalizedPath = normalizePath(rootPath);
17
+ const collectionName = pathToCollectionName(normalizedPath);
18
+ const config = getConfig();
19
+ onProgress?.(0, 'Scanning files...');
20
+ const filePaths = await scanFiles(normalizedPath, customExtensions, customIgnorePatterns);
21
+ if (filePaths.length === 0) {
22
+ throw new IndexingError(`No indexable files found in ${normalizedPath}`);
23
+ }
24
+ const currentSnapshot = buildSnapshot(normalizedPath, filePaths);
25
+ let filesToProcess;
26
+ let removedFiles = [];
27
+ let addedCount = 0;
28
+ let modifiedCount = 0;
29
+ if (force) {
30
+ onProgress?.(5, 'Dropping existing index...');
31
+ await vectordb.dropCollection(collectionName);
32
+ await vectordb.createCollection(collectionName, embedding.dimension);
33
+ filesToProcess = filePaths;
34
+ addedCount = filePaths.length;
35
+ }
36
+ else {
37
+ const previousSnapshot = loadSnapshot(normalizedPath);
38
+ if (!previousSnapshot || !(await vectordb.hasCollection(collectionName))) {
39
+ // First time indexing
40
+ await vectordb.createCollection(collectionName, embedding.dimension);
41
+ filesToProcess = filePaths;
42
+ addedCount = filePaths.length;
43
+ }
44
+ else {
45
+ const diff = diffSnapshots(previousSnapshot, currentSnapshot);
46
+ addedCount = diff.added.length;
47
+ modifiedCount = diff.modified.length;
48
+ removedFiles = diff.removed;
49
+ const toDelete = [...diff.removed, ...diff.modified];
50
+ for (const rel of toDelete) {
51
+ await vectordb.deleteByPath(collectionName, rel);
52
+ }
53
+ filesToProcess = [...diff.added, ...diff.modified];
54
+ }
55
+ }
56
+ if (filesToProcess.length === 0) {
57
+ saveSnapshot(normalizedPath, currentSnapshot);
58
+ return {
59
+ totalFiles: filePaths.length,
60
+ totalChunks: 0,
61
+ addedFiles: 0,
62
+ modifiedFiles: 0,
63
+ removedFiles: removedFiles.length,
64
+ skippedFiles: filePaths.length,
65
+ estimatedTokens: 0,
66
+ estimatedCostUsd: 0,
67
+ durationMs: Date.now() - start,
68
+ parseFailures: [],
69
+ };
70
+ }
71
+ onProgress?.(10, `Splitting ${filesToProcess.length} files...`);
72
+ const astSplitter = new AstSplitter();
73
+ const lineSplitter = new LineSplitter();
74
+ const allChunks = [];
75
+ const parseFailures = [];
76
+ const concurrency = config.indexingConcurrency;
77
+ for (let i = 0; i < filesToProcess.length; i += concurrency) {
78
+ const batch = filesToProcess.slice(i, i + concurrency);
79
+ const batchResults = await Promise.all(
80
+ // eslint-disable-next-line @typescript-eslint/require-await
81
+ batch.map(async (relPath) => {
82
+ const fullPath = path.join(normalizedPath, relPath);
83
+ try {
84
+ const code = fs.readFileSync(fullPath, 'utf-8');
85
+ if (code.trim().length === 0)
86
+ return { chunks: [], failed: false };
87
+ const ext = path.extname(relPath);
88
+ const language = extensionToLanguage(ext);
89
+ let chunks = astSplitter.split(code, language, relPath);
90
+ if (chunks.length === 0) {
91
+ chunks = lineSplitter.split(code, language, relPath);
92
+ }
93
+ if (chunks.length === 0)
94
+ return { chunks: [], failed: true };
95
+ return { chunks, failed: false };
96
+ }
97
+ catch (err) {
98
+ console.warn(`Failed to process "${relPath}":`, err);
99
+ return { chunks: [], failed: true };
100
+ }
101
+ }));
102
+ for (let j = 0; j < batchResults.length; j++) {
103
+ const { chunks, failed } = batchResults[j];
104
+ allChunks.push(...chunks);
105
+ if (failed)
106
+ parseFailures.push(batch[j]);
107
+ }
108
+ }
109
+ if (parseFailures.length > 0) {
110
+ console.warn(`Warning: ${parseFailures.length} file(s) produced no chunks: ${parseFailures.slice(0, 10).join(', ')}` +
111
+ (parseFailures.length > 10 ? ` (and ${parseFailures.length - 10} more)` : ''));
112
+ }
113
+ if (allChunks.length === 0) {
114
+ saveSnapshot(normalizedPath, currentSnapshot);
115
+ return {
116
+ totalFiles: filePaths.length,
117
+ totalChunks: 0,
118
+ addedFiles: addedCount,
119
+ modifiedFiles: modifiedCount,
120
+ removedFiles: removedFiles.length,
121
+ skippedFiles: filePaths.length - filesToProcess.length,
122
+ estimatedTokens: 0,
123
+ estimatedCostUsd: 0,
124
+ durationMs: Date.now() - start,
125
+ parseFailures,
126
+ };
127
+ }
128
+ const chunkTexts = allChunks.map((c) => c.content);
129
+ const estimation = embedding.estimateTokens(chunkTexts);
130
+ console.log(`Indexing ${filesToProcess.length} files -> ${allChunks.length} chunks -> ` +
131
+ `~${(estimation.estimatedTokens / 1000).toFixed(0)}K tokens (~$${estimation.estimatedCostUsd.toFixed(4)})`);
132
+ const batchSize = config.embeddingBatchSize;
133
+ let processedChunks = 0;
134
+ for (let i = 0; i < allChunks.length; i += batchSize) {
135
+ const batch = allChunks.slice(i, i + batchSize);
136
+ const texts = batch.map((c) => c.content);
137
+ const pct = 10 + Math.round((i / allChunks.length) * 85);
138
+ onProgress?.(pct, `Embedding batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(allChunks.length / batchSize)}...`);
139
+ const vectors = await embedding.embedBatch(texts);
140
+ if (vectors.length !== texts.length) {
141
+ throw new IndexingError(`Embedding dimension mismatch: sent ${texts.length} texts, got ${vectors.length} vectors`);
142
+ }
143
+ const documents = batch.map((chunk, j) => ({
144
+ id: randomUUID(),
145
+ content: chunk.content,
146
+ vector: vectors[j],
147
+ relativePath: chunk.filePath,
148
+ startLine: chunk.startLine,
149
+ endLine: chunk.endLine,
150
+ fileExtension: path.extname(chunk.filePath),
151
+ language: chunk.language,
152
+ fileCategory: classifyFileCategory(chunk.filePath),
153
+ symbolName: chunk.symbolName,
154
+ symbolKind: chunk.symbolKind,
155
+ symbolSignature: chunk.symbolSignature,
156
+ parentSymbol: chunk.parentSymbol,
157
+ }));
158
+ await vectordb.insert(collectionName, documents);
159
+ processedChunks += batch.length;
160
+ }
161
+ onProgress?.(95, 'Saving snapshot...');
162
+ saveSnapshot(normalizedPath, currentSnapshot);
163
+ onProgress?.(100, 'Done');
164
+ return {
165
+ totalFiles: filePaths.length,
166
+ totalChunks: processedChunks,
167
+ addedFiles: addedCount,
168
+ modifiedFiles: modifiedCount,
169
+ removedFiles: removedFiles.length,
170
+ skippedFiles: filePaths.length - filesToProcess.length,
171
+ estimatedTokens: estimation.estimatedTokens,
172
+ estimatedCostUsd: estimation.estimatedCostUsd,
173
+ durationMs: Date.now() - start,
174
+ parseFailures,
175
+ };
176
+ }
177
+ //# sourceMappingURL=indexer.js.map
@@ -0,0 +1,13 @@
1
+ export interface PreviewResult {
2
+ totalFiles: number;
3
+ byExtension: Record<string, number>;
4
+ topDirectories: {
5
+ dir: string;
6
+ count: number;
7
+ }[];
8
+ estimatedTokens: number;
9
+ estimatedCostUsd: number;
10
+ warnings: string[];
11
+ }
12
+ export declare function previewCodebase(rootPath: string, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<PreviewResult>;
13
+ //# sourceMappingURL=preview.d.ts.map