grepmind-core 0.1.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/dist/config/types.d.ts +174 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +137 -0
- package/dist/config/types.js.map +1 -0
- package/dist/git.d.ts +98 -0
- package/dist/git.d.ts.map +1 -0
- package/dist/git.js +298 -0
- package/dist/git.js.map +1 -0
- package/dist/git.test.d.ts +7 -0
- package/dist/git.test.d.ts.map +1 -0
- package/dist/git.test.js +242 -0
- package/dist/git.test.js.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +67 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer/branch.d.ts +121 -0
- package/dist/indexer/branch.d.ts.map +1 -0
- package/dist/indexer/branch.js +451 -0
- package/dist/indexer/branch.js.map +1 -0
- package/dist/indexer/chunker.d.ts +9 -0
- package/dist/indexer/chunker.d.ts.map +1 -0
- package/dist/indexer/chunker.js +70 -0
- package/dist/indexer/chunker.js.map +1 -0
- package/dist/indexer/chunker.test.d.ts +2 -0
- package/dist/indexer/chunker.test.d.ts.map +1 -0
- package/dist/indexer/chunker.test.js +180 -0
- package/dist/indexer/chunker.test.js.map +1 -0
- package/dist/indexer/code/branch.d.ts +155 -0
- package/dist/indexer/code/branch.d.ts.map +1 -0
- package/dist/indexer/code/branch.js +550 -0
- package/dist/indexer/code/branch.js.map +1 -0
- package/dist/indexer/code/branch.test.d.ts +7 -0
- package/dist/indexer/code/branch.test.d.ts.map +1 -0
- package/dist/indexer/code/branch.test.js +241 -0
- package/dist/indexer/code/branch.test.js.map +1 -0
- package/dist/indexer/code/chunker.d.ts +61 -0
- package/dist/indexer/code/chunker.d.ts.map +1 -0
- package/dist/indexer/code/chunker.js +311 -0
- package/dist/indexer/code/chunker.js.map +1 -0
- package/dist/indexer/code/chunker.test.d.ts +2 -0
- package/dist/indexer/code/chunker.test.d.ts.map +1 -0
- package/dist/indexer/code/chunker.test.js +552 -0
- package/dist/indexer/code/chunker.test.js.map +1 -0
- package/dist/indexer/code/fts.test.d.ts +2 -0
- package/dist/indexer/code/fts.test.d.ts.map +1 -0
- package/dist/indexer/code/fts.test.js +14 -0
- package/dist/indexer/code/fts.test.js.map +1 -0
- package/dist/indexer/code/graph/embedded.d.ts +11 -0
- package/dist/indexer/code/graph/embedded.d.ts.map +1 -0
- package/dist/indexer/code/graph/embedded.js +152 -0
- package/dist/indexer/code/graph/embedded.js.map +1 -0
- package/dist/indexer/code/graph/embedded.test.d.ts +2 -0
- package/dist/indexer/code/graph/embedded.test.d.ts.map +1 -0
- package/dist/indexer/code/graph/embedded.test.js +105 -0
- package/dist/indexer/code/graph/embedded.test.js.map +1 -0
- package/dist/indexer/code/graph/facts.d.ts +11 -0
- package/dist/indexer/code/graph/facts.d.ts.map +1 -0
- package/dist/indexer/code/graph/facts.js +456 -0
- package/dist/indexer/code/graph/facts.js.map +1 -0
- package/dist/indexer/code/graph/facts.test.d.ts +2 -0
- package/dist/indexer/code/graph/facts.test.d.ts.map +1 -0
- package/dist/indexer/code/graph/facts.test.js +181 -0
- package/dist/indexer/code/graph/facts.test.js.map +1 -0
- package/dist/indexer/code/graph/id.d.ts +14 -0
- package/dist/indexer/code/graph/id.d.ts.map +1 -0
- package/dist/indexer/code/graph/id.js +40 -0
- package/dist/indexer/code/graph/id.js.map +1 -0
- package/dist/indexer/code/graph/id.test.d.ts +2 -0
- package/dist/indexer/code/graph/id.test.d.ts.map +1 -0
- package/dist/indexer/code/graph/id.test.js +86 -0
- package/dist/indexer/code/graph/id.test.js.map +1 -0
- package/dist/indexer/code/graph/index.d.ts +133 -0
- package/dist/indexer/code/graph/index.d.ts.map +1 -0
- package/dist/indexer/code/graph/index.js +1876 -0
- package/dist/indexer/code/graph/index.js.map +1 -0
- package/dist/indexer/code/graph/index.test.d.ts +2 -0
- package/dist/indexer/code/graph/index.test.d.ts.map +1 -0
- package/dist/indexer/code/graph/index.test.js +210 -0
- package/dist/indexer/code/graph/index.test.js.map +1 -0
- package/dist/indexer/code/graph/queries.d.ts +22 -0
- package/dist/indexer/code/graph/queries.d.ts.map +1 -0
- package/dist/indexer/code/graph/queries.js +79 -0
- package/dist/indexer/code/graph/queries.js.map +1 -0
- package/dist/indexer/code/graph/queries.test.d.ts +2 -0
- package/dist/indexer/code/graph/queries.test.d.ts.map +1 -0
- package/dist/indexer/code/graph/queries.test.js +108 -0
- package/dist/indexer/code/graph/queries.test.js.map +1 -0
- package/dist/indexer/code/graph/resolver.d.ts +136 -0
- package/dist/indexer/code/graph/resolver.d.ts.map +1 -0
- package/dist/indexer/code/graph/resolver.js +839 -0
- package/dist/indexer/code/graph/resolver.js.map +1 -0
- package/dist/indexer/code/graph/resolver.test.d.ts +2 -0
- package/dist/indexer/code/graph/resolver.test.d.ts.map +1 -0
- package/dist/indexer/code/graph/resolver.test.js +482 -0
- package/dist/indexer/code/graph/resolver.test.js.map +1 -0
- package/dist/indexer/code/graph/semantic.d.ts +33 -0
- package/dist/indexer/code/graph/semantic.d.ts.map +1 -0
- package/dist/indexer/code/graph/semantic.js +279 -0
- package/dist/indexer/code/graph/semantic.js.map +1 -0
- package/dist/indexer/code/graph/semantic.test.d.ts +2 -0
- package/dist/indexer/code/graph/semantic.test.d.ts.map +1 -0
- package/dist/indexer/code/graph/semantic.test.js +127 -0
- package/dist/indexer/code/graph/semantic.test.js.map +1 -0
- package/dist/indexer/code/index.d.ts +404 -0
- package/dist/indexer/code/index.d.ts.map +1 -0
- package/dist/indexer/code/index.js +2070 -0
- package/dist/indexer/code/index.js.map +1 -0
- package/dist/indexer/code/languages/bash.d.ts +14 -0
- package/dist/indexer/code/languages/bash.d.ts.map +1 -0
- package/dist/indexer/code/languages/bash.js +125 -0
- package/dist/indexer/code/languages/bash.js.map +1 -0
- package/dist/indexer/code/languages/css.d.ts +16 -0
- package/dist/indexer/code/languages/css.d.ts.map +1 -0
- package/dist/indexer/code/languages/css.js +204 -0
- package/dist/indexer/code/languages/css.js.map +1 -0
- package/dist/indexer/code/languages/generic.d.ts +61 -0
- package/dist/indexer/code/languages/generic.d.ts.map +1 -0
- package/dist/indexer/code/languages/generic.js +150 -0
- package/dist/indexer/code/languages/generic.js.map +1 -0
- package/dist/indexer/code/languages/graphql.d.ts +13 -0
- package/dist/indexer/code/languages/graphql.d.ts.map +1 -0
- package/dist/indexer/code/languages/graphql.js +180 -0
- package/dist/indexer/code/languages/graphql.js.map +1 -0
- package/dist/indexer/code/languages/html.d.ts +16 -0
- package/dist/indexer/code/languages/html.d.ts.map +1 -0
- package/dist/indexer/code/languages/html.js +138 -0
- package/dist/indexer/code/languages/html.js.map +1 -0
- package/dist/indexer/code/languages/index.d.ts +9 -0
- package/dist/indexer/code/languages/index.d.ts.map +1 -0
- package/dist/indexer/code/languages/index.js +12 -0
- package/dist/indexer/code/languages/index.js.map +1 -0
- package/dist/indexer/code/languages/json.d.ts +12 -0
- package/dist/indexer/code/languages/json.d.ts.map +1 -0
- package/dist/indexer/code/languages/json.js +66 -0
- package/dist/indexer/code/languages/json.js.map +1 -0
- package/dist/indexer/code/languages/registry.d.ts +78 -0
- package/dist/indexer/code/languages/registry.d.ts.map +1 -0
- package/dist/indexer/code/languages/registry.js +72 -0
- package/dist/indexer/code/languages/registry.js.map +1 -0
- package/dist/indexer/code/languages/typescript.d.ts +39 -0
- package/dist/indexer/code/languages/typescript.d.ts.map +1 -0
- package/dist/indexer/code/languages/typescript.js +300 -0
- package/dist/indexer/code/languages/typescript.js.map +1 -0
- package/dist/indexer/code/languages/yaml.d.ts +13 -0
- package/dist/indexer/code/languages/yaml.d.ts.map +1 -0
- package/dist/indexer/code/languages/yaml.js +90 -0
- package/dist/indexer/code/languages/yaml.js.map +1 -0
- package/dist/indexer/code/parser.d.ts +26 -0
- package/dist/indexer/code/parser.d.ts.map +1 -0
- package/dist/indexer/code/parser.js +332 -0
- package/dist/indexer/code/parser.js.map +1 -0
- package/dist/indexer/code/retry.d.ts +58 -0
- package/dist/indexer/code/retry.d.ts.map +1 -0
- package/dist/indexer/code/retry.js +192 -0
- package/dist/indexer/code/retry.js.map +1 -0
- package/dist/indexer/code/tree/builder.d.ts +30 -0
- package/dist/indexer/code/tree/builder.d.ts.map +1 -0
- package/dist/indexer/code/tree/builder.js +132 -0
- package/dist/indexer/code/tree/builder.js.map +1 -0
- package/dist/indexer/code/tree/builder.test.d.ts +2 -0
- package/dist/indexer/code/tree/builder.test.d.ts.map +1 -0
- package/dist/indexer/code/tree/builder.test.js +31 -0
- package/dist/indexer/code/tree/builder.test.js.map +1 -0
- package/dist/indexer/code/tree/cache.d.ts +22 -0
- package/dist/indexer/code/tree/cache.d.ts.map +1 -0
- package/dist/indexer/code/tree/cache.js +85 -0
- package/dist/indexer/code/tree/cache.js.map +1 -0
- package/dist/indexer/code/tree/context.d.ts +32 -0
- package/dist/indexer/code/tree/context.d.ts.map +1 -0
- package/dist/indexer/code/tree/context.js +78 -0
- package/dist/indexer/code/tree/context.js.map +1 -0
- package/dist/indexer/code/tree/embedding.d.ts +9 -0
- package/dist/indexer/code/tree/embedding.d.ts.map +1 -0
- package/dist/indexer/code/tree/embedding.js +53 -0
- package/dist/indexer/code/tree/embedding.js.map +1 -0
- package/dist/indexer/code/tree/embedding.test.d.ts +2 -0
- package/dist/indexer/code/tree/embedding.test.d.ts.map +1 -0
- package/dist/indexer/code/tree/embedding.test.js +57 -0
- package/dist/indexer/code/tree/embedding.test.js.map +1 -0
- package/dist/indexer/code/tree/id.d.ts +3 -0
- package/dist/indexer/code/tree/id.d.ts.map +1 -0
- package/dist/indexer/code/tree/id.js +8 -0
- package/dist/indexer/code/tree/id.js.map +1 -0
- package/dist/indexer/code/tree/index.d.ts +113 -0
- package/dist/indexer/code/tree/index.d.ts.map +1 -0
- package/dist/indexer/code/tree/index.js +1146 -0
- package/dist/indexer/code/tree/index.js.map +1 -0
- package/dist/indexer/code/tree/rename.d.ts +13 -0
- package/dist/indexer/code/tree/rename.d.ts.map +1 -0
- package/dist/indexer/code/tree/rename.js +46 -0
- package/dist/indexer/code/tree/rename.js.map +1 -0
- package/dist/indexer/code/tree/repomap.d.ts +29 -0
- package/dist/indexer/code/tree/repomap.d.ts.map +1 -0
- package/dist/indexer/code/tree/repomap.js +95 -0
- package/dist/indexer/code/tree/repomap.js.map +1 -0
- package/dist/indexer/code/tree/repomap.test.d.ts +2 -0
- package/dist/indexer/code/tree/repomap.test.d.ts.map +1 -0
- package/dist/indexer/code/tree/repomap.test.js +93 -0
- package/dist/indexer/code/tree/repomap.test.js.map +1 -0
- package/dist/indexer/code/tree/stats.d.ts +26 -0
- package/dist/indexer/code/tree/stats.d.ts.map +1 -0
- package/dist/indexer/code/tree/stats.js +49 -0
- package/dist/indexer/code/tree/stats.js.map +1 -0
- package/dist/indexer/code/tree/types.d.ts +186 -0
- package/dist/indexer/code/tree/types.d.ts.map +1 -0
- package/dist/indexer/code/tree/types.js +10 -0
- package/dist/indexer/code/tree/types.js.map +1 -0
- package/dist/indexer/code/wal.d.ts +144 -0
- package/dist/indexer/code/wal.d.ts.map +1 -0
- package/dist/indexer/code/wal.js +283 -0
- package/dist/indexer/code/wal.js.map +1 -0
- package/dist/indexer/embeddings.d.ts +113 -0
- package/dist/indexer/embeddings.d.ts.map +1 -0
- package/dist/indexer/embeddings.js +477 -0
- package/dist/indexer/embeddings.js.map +1 -0
- package/dist/indexer/git-sync.d.ts +117 -0
- package/dist/indexer/git-sync.d.ts.map +1 -0
- package/dist/indexer/git-sync.js +398 -0
- package/dist/indexer/git-sync.js.map +1 -0
- package/dist/indexer/index.d.ts +175 -0
- package/dist/indexer/index.d.ts.map +1 -0
- package/dist/indexer/index.js +1096 -0
- package/dist/indexer/index.js.map +1 -0
- package/dist/indexer/mocks/mock-reranker.d.ts +12 -0
- package/dist/indexer/mocks/mock-reranker.d.ts.map +1 -0
- package/dist/indexer/mocks/mock-reranker.js +26 -0
- package/dist/indexer/mocks/mock-reranker.js.map +1 -0
- package/dist/indexer/parser.d.ts +8 -0
- package/dist/indexer/parser.d.ts.map +1 -0
- package/dist/indexer/parser.js +44 -0
- package/dist/indexer/parser.js.map +1 -0
- package/dist/indexer/parser.test.d.ts +2 -0
- package/dist/indexer/parser.test.d.ts.map +1 -0
- package/dist/indexer/parser.test.js +197 -0
- package/dist/indexer/parser.test.js.map +1 -0
- package/dist/indexer/reranking.d.ts +71 -0
- package/dist/indexer/reranking.d.ts.map +1 -0
- package/dist/indexer/reranking.integration.test.d.ts +2 -0
- package/dist/indexer/reranking.integration.test.d.ts.map +1 -0
- package/dist/indexer/reranking.integration.test.js +104 -0
- package/dist/indexer/reranking.integration.test.js.map +1 -0
- package/dist/indexer/reranking.js +256 -0
- package/dist/indexer/reranking.js.map +1 -0
- package/dist/indexer/reranking.test.d.ts +2 -0
- package/dist/indexer/reranking.test.d.ts.map +1 -0
- package/dist/indexer/reranking.test.js +130 -0
- package/dist/indexer/reranking.test.js.map +1 -0
- package/dist/indexer/wal/file-storage.d.ts +60 -0
- package/dist/indexer/wal/file-storage.d.ts.map +1 -0
- package/dist/indexer/wal/file-storage.js +277 -0
- package/dist/indexer/wal/file-storage.js.map +1 -0
- package/dist/indexer/wal/file-storage.test.d.ts +8 -0
- package/dist/indexer/wal/file-storage.test.d.ts.map +1 -0
- package/dist/indexer/wal/file-storage.test.js +444 -0
- package/dist/indexer/wal/file-storage.test.js.map +1 -0
- package/dist/indexer/wal/index.d.ts +41 -0
- package/dist/indexer/wal/index.d.ts.map +1 -0
- package/dist/indexer/wal/index.js +61 -0
- package/dist/indexer/wal/index.js.map +1 -0
- package/dist/indexer/wal/integration.test.d.ts +11 -0
- package/dist/indexer/wal/integration.test.d.ts.map +1 -0
- package/dist/indexer/wal/integration.test.js +378 -0
- package/dist/indexer/wal/integration.test.js.map +1 -0
- package/dist/indexer/wal/lancedb-storage.d.ts +72 -0
- package/dist/indexer/wal/lancedb-storage.d.ts.map +1 -0
- package/dist/indexer/wal/lancedb-storage.js +462 -0
- package/dist/indexer/wal/lancedb-storage.js.map +1 -0
- package/dist/indexer/wal/lancedb-storage.test.d.ts +8 -0
- package/dist/indexer/wal/lancedb-storage.test.d.ts.map +1 -0
- package/dist/indexer/wal/lancedb-storage.test.js +415 -0
- package/dist/indexer/wal/lancedb-storage.test.js.map +1 -0
- package/dist/indexer/wal/sync-wal.d.ts +144 -0
- package/dist/indexer/wal/sync-wal.d.ts.map +1 -0
- package/dist/indexer/wal/sync-wal.js +863 -0
- package/dist/indexer/wal/sync-wal.js.map +1 -0
- package/dist/indexer/wal/sync-wal.test.d.ts +8 -0
- package/dist/indexer/wal/sync-wal.test.d.ts.map +1 -0
- package/dist/indexer/wal/sync-wal.test.js +752 -0
- package/dist/indexer/wal/sync-wal.test.js.map +1 -0
- package/dist/indexer/wal/types.d.ts +167 -0
- package/dist/indexer/wal/types.d.ts.map +1 -0
- package/dist/indexer/wal/types.js +12 -0
- package/dist/indexer/wal/types.js.map +1 -0
- package/dist/indexer/watcher.d.ts +36 -0
- package/dist/indexer/watcher.d.ts.map +1 -0
- package/dist/indexer/watcher.js +110 -0
- package/dist/indexer/watcher.js.map +1 -0
- package/dist/search/explore.d.ts +62 -0
- package/dist/search/explore.d.ts.map +1 -0
- package/dist/search/explore.js +111 -0
- package/dist/search/explore.js.map +1 -0
- package/dist/search/fts.d.ts +23 -0
- package/dist/search/fts.d.ts.map +1 -0
- package/dist/search/fts.js +64 -0
- package/dist/search/fts.js.map +1 -0
- package/dist/search/fts.test.d.ts +2 -0
- package/dist/search/fts.test.d.ts.map +1 -0
- package/dist/search/fts.test.js +27 -0
- package/dist/search/fts.test.js.map +1 -0
- package/dist/search/grep.d.ts +75 -0
- package/dist/search/grep.d.ts.map +1 -0
- package/dist/search/grep.js +96 -0
- package/dist/search/grep.js.map +1 -0
- package/dist/search/grep.test.d.ts +2 -0
- package/dist/search/grep.test.d.ts.map +1 -0
- package/dist/search/grep.test.js +178 -0
- package/dist/search/grep.test.js.map +1 -0
- package/dist/search/hybrid-grep.d.ts +43 -0
- package/dist/search/hybrid-grep.d.ts.map +1 -0
- package/dist/search/hybrid-grep.js +130 -0
- package/dist/search/hybrid-grep.js.map +1 -0
- package/dist/search/hybrid-grep.test.d.ts +2 -0
- package/dist/search/hybrid-grep.test.d.ts.map +1 -0
- package/dist/search/hybrid-grep.test.js +133 -0
- package/dist/search/hybrid-grep.test.js.map +1 -0
- package/dist/search/rg-executor.d.ts +63 -0
- package/dist/search/rg-executor.d.ts.map +1 -0
- package/dist/search/rg-executor.js +146 -0
- package/dist/search/rg-executor.js.map +1 -0
- package/dist/search/rg-executor.test.d.ts +2 -0
- package/dist/search/rg-executor.test.d.ts.map +1 -0
- package/dist/search/rg-executor.test.js +104 -0
- package/dist/search/rg-executor.test.js.map +1 -0
- package/dist/search/rg-parser/extractor.d.ts +14 -0
- package/dist/search/rg-parser/extractor.d.ts.map +1 -0
- package/dist/search/rg-parser/extractor.js +82 -0
- package/dist/search/rg-parser/extractor.js.map +1 -0
- package/dist/search/rg-parser/extractor.test.d.ts +2 -0
- package/dist/search/rg-parser/extractor.test.d.ts.map +1 -0
- package/dist/search/rg-parser/extractor.test.js +35 -0
- package/dist/search/rg-parser/extractor.test.js.map +1 -0
- package/dist/search/rg-parser/fts-builder.d.ts +7 -0
- package/dist/search/rg-parser/fts-builder.d.ts.map +1 -0
- package/dist/search/rg-parser/fts-builder.js +18 -0
- package/dist/search/rg-parser/fts-builder.js.map +1 -0
- package/dist/search/rg-parser/fts-builder.test.d.ts +2 -0
- package/dist/search/rg-parser/fts-builder.test.d.ts.map +1 -0
- package/dist/search/rg-parser/fts-builder.test.js +26 -0
- package/dist/search/rg-parser/fts-builder.test.js.map +1 -0
- package/dist/search/rg-parser/index.d.ts +36 -0
- package/dist/search/rg-parser/index.d.ts.map +1 -0
- package/dist/search/rg-parser/index.js +83 -0
- package/dist/search/rg-parser/index.js.map +1 -0
- package/dist/search/rg-parser/index.test.d.ts +2 -0
- package/dist/search/rg-parser/index.test.d.ts.map +1 -0
- package/dist/search/rg-parser/index.test.js +34 -0
- package/dist/search/rg-parser/index.test.js.map +1 -0
- package/dist/search/rg-parser/strategy.d.ts +14 -0
- package/dist/search/rg-parser/strategy.d.ts.map +1 -0
- package/dist/search/rg-parser/strategy.js +31 -0
- package/dist/search/rg-parser/strategy.js.map +1 -0
- package/dist/search/rg-parser/strategy.test.d.ts +2 -0
- package/dist/search/rg-parser/strategy.test.d.ts.map +1 -0
- package/dist/search/rg-parser/strategy.test.js +29 -0
- package/dist/search/rg-parser/strategy.test.js.map +1 -0
- package/dist/types.d.ts +345 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +7 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/vault.d.ts +84 -0
- package/dist/utils/vault.d.ts.map +1 -0
- package/dist/utils/vault.js +138 -0
- package/dist/utils/vault.js.map +1 -0
- package/dist/utils/vault.test.d.ts +2 -0
- package/dist/utils/vault.test.d.ts.map +1 -0
- package/dist/utils/vault.test.js +153 -0
- package/dist/utils/vault.test.js.map +1 -0
- package/package.json +69 -0
|
@@ -0,0 +1,2070 @@
|
|
|
1
|
+
// CodeIndexer - main code indexing class
|
|
2
|
+
// Refactored for DI: accepts config and embedding provider via constructor
|
|
3
|
+
import * as lancedb from '@lancedb/lancedb';
|
|
4
|
+
import { BooleanQuery, MatchQuery, Occur, PhraseQuery } from '@lancedb/lancedb';
|
|
5
|
+
import { glob } from 'glob';
|
|
6
|
+
import fs from 'node:fs/promises';
|
|
7
|
+
import { existsSync, mkdirSync, rmSync, statSync, readFileSync } from 'node:fs';
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
import crypto from 'node:crypto';
|
|
10
|
+
import _ignore from 'ignore';
|
|
11
|
+
const ignore = _ignore.default || _ignore;
|
|
12
|
+
import { minimatch } from 'minimatch';
|
|
13
|
+
import { DEFAULT_RERANKING_CONFIG, } from '../../config/types.js';
|
|
14
|
+
import { createTreeSitterParser } from './parser.js';
|
|
15
|
+
import { applyStableChunkIds, symbolsToChunks, createFileChunk, buildEmbeddingText, generateContentHash, } from './chunker.js';
|
|
16
|
+
import { languageRegistry } from './languages/index.js';
|
|
17
|
+
import { createError, isError } from '../../types.js';
|
|
18
|
+
import { CodeIndexWal } from './wal.js';
|
|
19
|
+
import { withRetry } from './retry.js';
|
|
20
|
+
import { GraphIndexer } from './graph/index.js';
|
|
21
|
+
import { buildPathPrefixFilter, combineFilters } from '../../search/fts.js';
|
|
22
|
+
import { TreeIndexer } from './tree/index.js';
|
|
23
|
+
// Import language extractors to register them
|
|
24
|
+
import './languages/typescript.js';
|
|
25
|
+
import './languages/css.js';
|
|
26
|
+
import './languages/graphql.js';
|
|
27
|
+
import './languages/json.js';
|
|
28
|
+
import './languages/yaml.js';
|
|
29
|
+
import './languages/html.js';
|
|
30
|
+
import './languages/bash.js';
|
|
31
|
+
// Lock timeout for concurrent indexing protection
|
|
32
|
+
const LOCK_TIMEOUT_MS = 60_000; // 1 minute
|
|
33
|
+
// Default retry options for embedding calls
|
|
34
|
+
const DEFAULT_EMBEDDING_RETRY = {
|
|
35
|
+
maxRetries: 3,
|
|
36
|
+
baseDelayMs: 1000,
|
|
37
|
+
maxDelayMs: 30000,
|
|
38
|
+
retryableErrors: [
|
|
39
|
+
'429',
|
|
40
|
+
'rate limit',
|
|
41
|
+
'Rate limit',
|
|
42
|
+
'RATE_LIMIT',
|
|
43
|
+
'5',
|
|
44
|
+
'ECONNRESET',
|
|
45
|
+
'ETIMEDOUT',
|
|
46
|
+
'timeout',
|
|
47
|
+
'temporarily unavailable',
|
|
48
|
+
'service unavailable',
|
|
49
|
+
],
|
|
50
|
+
};
|
|
51
|
+
export function normalizeIdentifiers(content) {
|
|
52
|
+
return content
|
|
53
|
+
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
54
|
+
.replace(/[_-]+/g, ' ')
|
|
55
|
+
.replace(/\s+/g, ' ')
|
|
56
|
+
.trim();
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Convert CodeChunk (camelCase) to LanceDB record (snake_case).
|
|
60
|
+
* Required because LanceDB SQL doesn't support camelCase column names.
|
|
61
|
+
*/
|
|
62
|
+
function toChunkRecord(chunk, vector, normalizeContent) {
|
|
63
|
+
const normalized = normalizeContent ? normalizeIdentifiers(chunk.content) : chunk.content;
|
|
64
|
+
return {
|
|
65
|
+
id: chunk.id,
|
|
66
|
+
vector,
|
|
67
|
+
path: chunk.path,
|
|
68
|
+
language: chunk.language,
|
|
69
|
+
symbol_type: chunk.symbolType,
|
|
70
|
+
symbol_name: chunk.symbolName,
|
|
71
|
+
signature: chunk.signature,
|
|
72
|
+
parent_symbol: chunk.parentSymbol,
|
|
73
|
+
scope: JSON.stringify(chunk.scope),
|
|
74
|
+
content: chunk.content,
|
|
75
|
+
content_normalized: normalized,
|
|
76
|
+
start_line: chunk.startLine,
|
|
77
|
+
end_line: chunk.endLine,
|
|
78
|
+
docstring: chunk.docstring,
|
|
79
|
+
modified: chunk.modified,
|
|
80
|
+
content_hash: chunk.contentHash,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Convert LanceDB record (snake_case) to CodeSearchResult (camelCase).
|
|
85
|
+
* Required because LanceDB SQL doesn't support camelCase column names.
|
|
86
|
+
*/
|
|
87
|
+
function fromChunkRecord(r) {
|
|
88
|
+
return {
|
|
89
|
+
id: r.id,
|
|
90
|
+
path: r.path,
|
|
91
|
+
language: r.language,
|
|
92
|
+
symbolType: r.symbol_type,
|
|
93
|
+
symbolName: r.symbol_name,
|
|
94
|
+
signature: r.signature,
|
|
95
|
+
parentSymbol: r.parent_symbol,
|
|
96
|
+
scope: JSON.parse(r.scope || '[]'),
|
|
97
|
+
content: r.content,
|
|
98
|
+
startLine: r.start_line,
|
|
99
|
+
endLine: r.end_line,
|
|
100
|
+
docstring: r.docstring,
|
|
101
|
+
modified: r.modified,
|
|
102
|
+
contentHash: r.content_hash,
|
|
103
|
+
score: (r._distance !== undefined) ? 1 - r._distance / 2 : 1.0,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
// Default include patterns
|
|
107
|
+
const DEFAULT_INCLUDE = [
|
|
108
|
+
'**/*.ts',
|
|
109
|
+
'**/*.tsx',
|
|
110
|
+
'**/*.js',
|
|
111
|
+
'**/*.jsx',
|
|
112
|
+
'**/*.mjs',
|
|
113
|
+
'**/*.cjs',
|
|
114
|
+
'**/*.css',
|
|
115
|
+
'**/*.graphql',
|
|
116
|
+
'**/*.gql',
|
|
117
|
+
'**/*.graphqls',
|
|
118
|
+
'**/*.json',
|
|
119
|
+
'**/*.yaml',
|
|
120
|
+
'**/*.yml',
|
|
121
|
+
'**/*.html',
|
|
122
|
+
'**/*.htm',
|
|
123
|
+
'**/*.sh',
|
|
124
|
+
'**/*.bash',
|
|
125
|
+
'**/*.zsh',
|
|
126
|
+
];
|
|
127
|
+
// Default exclude patterns
|
|
128
|
+
const DEFAULT_EXCLUDE = [
|
|
129
|
+
'**/node_modules/**',
|
|
130
|
+
'**/dist/**',
|
|
131
|
+
'**/build/**',
|
|
132
|
+
'**/out/**',
|
|
133
|
+
'**/.git/**',
|
|
134
|
+
'**/.next/**',
|
|
135
|
+
'**/.nuxt/**',
|
|
136
|
+
'**/.svelte-kit/**',
|
|
137
|
+
'**/coverage/**',
|
|
138
|
+
'**/__pycache__/**',
|
|
139
|
+
'**/*.min.js',
|
|
140
|
+
'**/*.bundle.js',
|
|
141
|
+
'**/*.chunk.js',
|
|
142
|
+
'**/vendor/**',
|
|
143
|
+
'**/third_party/**',
|
|
144
|
+
'**/*.d.ts', // Declaration files excluded by default
|
|
145
|
+
];
|
|
146
|
+
// Valid languages for filter sanitization
|
|
147
|
+
const VALID_LANGUAGES = new Set([
|
|
148
|
+
'typescript',
|
|
149
|
+
'tsx',
|
|
150
|
+
'javascript',
|
|
151
|
+
'jsx',
|
|
152
|
+
'css',
|
|
153
|
+
'graphql',
|
|
154
|
+
'json',
|
|
155
|
+
'yaml',
|
|
156
|
+
'html',
|
|
157
|
+
'bash',
|
|
158
|
+
]);
|
|
159
|
+
// Valid symbol types for filter sanitization
|
|
160
|
+
const VALID_SYMBOL_TYPES = new Set([
|
|
161
|
+
'function',
|
|
162
|
+
'class',
|
|
163
|
+
'method',
|
|
164
|
+
'interface',
|
|
165
|
+
'type',
|
|
166
|
+
'variable',
|
|
167
|
+
'comment',
|
|
168
|
+
'file',
|
|
169
|
+
]);
|
|
170
|
+
/**
|
|
171
|
+
* Sanitize identifier for SQL WHERE clause.
|
|
172
|
+
*/
|
|
173
|
+
function sanitizeIdentifier(value) {
|
|
174
|
+
if (!/^[a-zA-Z0-9_-]+$/.test(value)) {
|
|
175
|
+
throw new Error(`Invalid identifier: ${value}`);
|
|
176
|
+
}
|
|
177
|
+
return value;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Sanitize path for SQL WHERE clause.
|
|
181
|
+
*/
|
|
182
|
+
function sanitizePath(value) {
|
|
183
|
+
return value.replace(/'/g, "''");
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Execute function with index lock to prevent concurrent indexing.
|
|
187
|
+
*/
|
|
188
|
+
async function withIndexLock(lockDir, fn) {
|
|
189
|
+
const lockPath = path.join(lockDir, 'code_index.lock');
|
|
190
|
+
// Atomic lock acquisition via mkdir (throws EEXIST if already locked)
|
|
191
|
+
try {
|
|
192
|
+
mkdirSync(lockPath);
|
|
193
|
+
}
|
|
194
|
+
catch (err) {
|
|
195
|
+
if (err.code === 'EEXIST') {
|
|
196
|
+
// Lock exists - check if stale
|
|
197
|
+
try {
|
|
198
|
+
const stat = statSync(lockPath);
|
|
199
|
+
if (Date.now() - stat.mtimeMs > LOCK_TIMEOUT_MS) {
|
|
200
|
+
// Stale lock - remove and retry once
|
|
201
|
+
rmSync(lockPath, { recursive: true });
|
|
202
|
+
mkdirSync(lockPath);
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
throw new Error('Code index is locked by another process');
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
catch (statErr) {
|
|
209
|
+
// Lock was removed between EEXIST and stat - retry
|
|
210
|
+
if (statErr.code === 'ENOENT') {
|
|
211
|
+
mkdirSync(lockPath);
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
throw statErr;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
throw err;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
try {
|
|
223
|
+
return await fn();
|
|
224
|
+
}
|
|
225
|
+
finally {
|
|
226
|
+
rmSync(lockPath, { recursive: true, force: true });
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* CodeIndexer - indexes code files using Tree-sitter and LanceDB.
|
|
231
|
+
*/
|
|
232
|
+
export class CodeIndexer {
|
|
233
|
+
db = null;
|
|
234
|
+
chunksTable = null;
|
|
235
|
+
fileIndexTable = null;
|
|
236
|
+
initialized = false;
|
|
237
|
+
rootPath;
|
|
238
|
+
dbPath;
|
|
239
|
+
localStateDir;
|
|
240
|
+
metaPath;
|
|
241
|
+
gitignore = null;
|
|
242
|
+
wal;
|
|
243
|
+
walEnabled;
|
|
244
|
+
retryOptions;
|
|
245
|
+
parser;
|
|
246
|
+
graphIndexer = null;
|
|
247
|
+
treeIndexer = null;
|
|
248
|
+
// Injected dependencies
|
|
249
|
+
embeddingProvider;
|
|
250
|
+
rerankerProvider;
|
|
251
|
+
embeddingConfig;
|
|
252
|
+
rerankingConfig;
|
|
253
|
+
codeConfig;
|
|
254
|
+
codeFtsConfig;
|
|
255
|
+
lancedbStorageOptions;
|
|
256
|
+
// Unified SyncWal (optional - when provided, uses unified recovery)
|
|
257
|
+
syncWal = null;
|
|
258
|
+
constructor(rootPath, dbPath, deps, options = {}) {
|
|
259
|
+
this.rootPath = path.resolve(rootPath);
|
|
260
|
+
this.dbPath = dbPath;
|
|
261
|
+
this.localStateDir = options.localStateDir ?? dbPath;
|
|
262
|
+
this.metaPath = path.join(this.localStateDir, 'code_meta.json');
|
|
263
|
+
this.wal = new CodeIndexWal(this.localStateDir);
|
|
264
|
+
this.walEnabled = options.walEnabled ?? deps.codeConfig.wal?.enabled ?? true;
|
|
265
|
+
this.retryOptions = {
|
|
266
|
+
...DEFAULT_EMBEDDING_RETRY,
|
|
267
|
+
...deps.codeConfig.retry,
|
|
268
|
+
...options.retryOptions,
|
|
269
|
+
};
|
|
270
|
+
this.parser = createTreeSitterParser();
|
|
271
|
+
if (deps.codeConfig.graph.enabled) {
|
|
272
|
+
this.graphIndexer = new GraphIndexer({
|
|
273
|
+
rootPath: this.rootPath,
|
|
274
|
+
dbPath: this.dbPath,
|
|
275
|
+
codeConfig: deps.codeConfig,
|
|
276
|
+
parser: this.parser,
|
|
277
|
+
storageOptions: deps.lancedbStorageOptions,
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
if (deps.codeConfig.tree?.enabled) {
|
|
281
|
+
this.treeIndexer = new TreeIndexer(this.rootPath, this.dbPath, deps.embeddingProvider, deps.embeddingProvider.getDimensions(), deps.codeConfig.tree, {
|
|
282
|
+
localStateDir: this.localStateDir,
|
|
283
|
+
lancedbStorageOptions: deps.lancedbStorageOptions,
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
// Store injected dependencies
|
|
287
|
+
this.embeddingProvider = deps.embeddingProvider;
|
|
288
|
+
this.rerankerProvider = deps.rerankerProvider ?? null;
|
|
289
|
+
this.embeddingConfig = deps.embeddingConfig;
|
|
290
|
+
this.rerankingConfig = deps.rerankingConfig ?? DEFAULT_RERANKING_CONFIG;
|
|
291
|
+
this.codeConfig = deps.codeConfig;
|
|
292
|
+
this.codeFtsConfig = deps.codeFtsConfig;
|
|
293
|
+
this.lancedbStorageOptions = deps.lancedbStorageOptions;
|
|
294
|
+
// Store unified SyncWal if provided
|
|
295
|
+
this.syncWal = deps.syncWal || null;
|
|
296
|
+
}
|
|
297
|
+
async loadMeta() {
|
|
298
|
+
try {
|
|
299
|
+
const content = await fs.readFile(this.metaPath, 'utf-8');
|
|
300
|
+
return JSON.parse(content);
|
|
301
|
+
}
|
|
302
|
+
catch {
|
|
303
|
+
return null;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
async saveMeta() {
|
|
307
|
+
const modelName = this.embeddingProvider.getModelName();
|
|
308
|
+
const meta = {
|
|
309
|
+
model: modelName,
|
|
310
|
+
provider: this.embeddingConfig.provider,
|
|
311
|
+
dimensions: this.embeddingProvider.getDimensions(),
|
|
312
|
+
createdAt: Date.now(),
|
|
313
|
+
};
|
|
314
|
+
await fs.mkdir(path.dirname(this.metaPath), { recursive: true });
|
|
315
|
+
await fs.writeFile(this.metaPath, JSON.stringify(meta, null, 2));
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Create file filter based on include/exclude patterns and .gitignore.
|
|
319
|
+
*/
|
|
320
|
+
createFileFilter(options) {
|
|
321
|
+
const include = options.include || this.codeConfig.include || DEFAULT_INCLUDE;
|
|
322
|
+
const exclude = options.exclude || this.codeConfig.exclude || DEFAULT_EXCLUDE;
|
|
323
|
+
const maxFileSize = options.maxFileSize || this.codeConfig.maxFileSize || 1024 * 1024;
|
|
324
|
+
// Load .gitignore (always respected)
|
|
325
|
+
if (!this.gitignore) {
|
|
326
|
+
const ig = ignore();
|
|
327
|
+
const gitignorePath = path.join(this.rootPath, '.gitignore');
|
|
328
|
+
if (existsSync(gitignorePath)) {
|
|
329
|
+
try {
|
|
330
|
+
const content = readFileSync(gitignorePath, 'utf8');
|
|
331
|
+
ig.add(content);
|
|
332
|
+
}
|
|
333
|
+
catch {
|
|
334
|
+
// Ignore errors reading .gitignore
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
this.gitignore = ig;
|
|
338
|
+
}
|
|
339
|
+
return (filePath) => {
|
|
340
|
+
const relativePath = path.relative(this.rootPath, filePath);
|
|
341
|
+
// 1. Must match at least one include pattern
|
|
342
|
+
const included = include.some((p) => minimatch(relativePath, p));
|
|
343
|
+
if (!included) {
|
|
344
|
+
return false;
|
|
345
|
+
}
|
|
346
|
+
// 2. Must not match any exclude pattern
|
|
347
|
+
if (exclude.some((p) => minimatch(relativePath, p))) {
|
|
348
|
+
return false;
|
|
349
|
+
}
|
|
350
|
+
// 3. Must not be in .gitignore
|
|
351
|
+
if (this.gitignore && this.gitignore.ignores(relativePath)) {
|
|
352
|
+
return false;
|
|
353
|
+
}
|
|
354
|
+
// 4. File size check is done at read time
|
|
355
|
+
return true;
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Compute manifest hash from sorted file paths for recovery validation.
|
|
360
|
+
* Both notes and code use the same hash algorithm.
|
|
361
|
+
*/
|
|
362
|
+
computeManifestHash(files) {
|
|
363
|
+
// Sort deterministically by path
|
|
364
|
+
const sortedPaths = files.map(f => path.relative(this.rootPath, f)).sort();
|
|
365
|
+
const content = sortedPaths.join('\n');
|
|
366
|
+
return crypto.createHash('sha256').update(content).digest('hex');
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Initialize the indexer - connect to LanceDB and initialize parser.
|
|
370
|
+
*/
|
|
371
|
+
async initialize() {
|
|
372
|
+
if (this.initialized)
|
|
373
|
+
return;
|
|
374
|
+
// Ensure LanceDB directory exists
|
|
375
|
+
const isRemoteDbPath = /^[a-z][a-z0-9+.-]*:\/\//i.test(this.dbPath);
|
|
376
|
+
if (!isRemoteDbPath) {
|
|
377
|
+
await fs.mkdir(this.dbPath, { recursive: true });
|
|
378
|
+
}
|
|
379
|
+
// Connect to LanceDB
|
|
380
|
+
this.db = await lancedb.connect(this.dbPath, {
|
|
381
|
+
storageOptions: this.lancedbStorageOptions,
|
|
382
|
+
});
|
|
383
|
+
const tableNames = await this.db.tableNames();
|
|
384
|
+
// Check if embedding config changed
|
|
385
|
+
const meta = await this.loadMeta();
|
|
386
|
+
const currentModel = this.embeddingProvider.getModelName();
|
|
387
|
+
const currentDimensions = this.embeddingProvider.getDimensions();
|
|
388
|
+
const needsRecreate = meta && (meta.model !== currentModel ||
|
|
389
|
+
meta.provider !== this.embeddingConfig.provider ||
|
|
390
|
+
meta.dimensions !== currentDimensions);
|
|
391
|
+
if (needsRecreate) {
|
|
392
|
+
console.warn('[code-indexer] Embedding config changed, recreating index...');
|
|
393
|
+
console.warn(` Previous: model=${meta.model}, provider=${meta.provider}, dimensions=${meta.dimensions}`);
|
|
394
|
+
console.warn(` Current: model=${currentModel}, provider=${this.embeddingConfig.provider}, dimensions=${currentDimensions}`);
|
|
395
|
+
// Drop tables if they exist
|
|
396
|
+
if (tableNames.includes('code_chunks')) {
|
|
397
|
+
await this.db.dropTable('code_chunks');
|
|
398
|
+
}
|
|
399
|
+
if (tableNames.includes('code_file_index')) {
|
|
400
|
+
await this.db.dropTable('code_file_index');
|
|
401
|
+
}
|
|
402
|
+
this.chunksTable = null;
|
|
403
|
+
this.fileIndexTable = null;
|
|
404
|
+
}
|
|
405
|
+
// Save current meta
|
|
406
|
+
await this.saveMeta();
|
|
407
|
+
// Open existing tables if they exist (and weren't dropped)
|
|
408
|
+
if (!this.chunksTable && (await this.db.tableNames()).includes('code_chunks')) {
|
|
409
|
+
this.chunksTable = await this.db.openTable('code_chunks');
|
|
410
|
+
}
|
|
411
|
+
if (!this.fileIndexTable && (await this.db.tableNames()).includes('code_file_index')) {
|
|
412
|
+
this.fileIndexTable = await this.db.openTable('code_file_index');
|
|
413
|
+
}
|
|
414
|
+
// Initialize Tree-sitter parser
|
|
415
|
+
await this.parser.initialize();
|
|
416
|
+
if (this.graphIndexer) {
|
|
417
|
+
await this.graphIndexer.initialize();
|
|
418
|
+
}
|
|
419
|
+
this.initialized = true;
|
|
420
|
+
// Auto-create all tables at startup
|
|
421
|
+
await this.ensureAllTables();
|
|
422
|
+
}
|
|
423
|
+
/**
|
|
424
|
+
* Ensure all code index tables exist with proper schema.
|
|
425
|
+
* Safe to call multiple times (idempotent).
|
|
426
|
+
*/
|
|
427
|
+
async ensureAllTables() {
|
|
428
|
+
if (!this.initialized) {
|
|
429
|
+
await this.initialize();
|
|
430
|
+
return; // initialize() already called ensureAllTables()
|
|
431
|
+
}
|
|
432
|
+
await this.ensureChunksTable();
|
|
433
|
+
await this.ensureFileIndexTable();
|
|
434
|
+
if (this.graphIndexer) {
|
|
435
|
+
await this.graphIndexer.ensureAllTables();
|
|
436
|
+
}
|
|
437
|
+
if (this.treeIndexer) {
|
|
438
|
+
await this.treeIndexer.ensureAllTables();
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
/**
|
|
442
|
+
* Ensure code_chunks table exists.
|
|
443
|
+
*/
|
|
444
|
+
async ensureChunksTable() {
|
|
445
|
+
if (this.chunksTable)
|
|
446
|
+
return this.chunksTable;
|
|
447
|
+
const dimensions = this.embeddingProvider.getDimensions();
|
|
448
|
+
// Create with sample data for schema inference (snake_case for SQL compatibility)
|
|
449
|
+
const sampleData = [
|
|
450
|
+
{
|
|
451
|
+
id: '__init__',
|
|
452
|
+
vector: new Array(dimensions).fill(0),
|
|
453
|
+
path: '',
|
|
454
|
+
language: '',
|
|
455
|
+
symbol_type: 'function',
|
|
456
|
+
symbol_name: '',
|
|
457
|
+
signature: '',
|
|
458
|
+
parent_symbol: '',
|
|
459
|
+
scope: '[]', // JSON array
|
|
460
|
+
content: '',
|
|
461
|
+
content_normalized: '',
|
|
462
|
+
start_line: 0,
|
|
463
|
+
end_line: 0,
|
|
464
|
+
docstring: '',
|
|
465
|
+
modified: 0,
|
|
466
|
+
content_hash: '',
|
|
467
|
+
},
|
|
468
|
+
];
|
|
469
|
+
this.chunksTable = await this.db.createTable('code_chunks', sampleData, {
|
|
470
|
+
mode: 'overwrite',
|
|
471
|
+
});
|
|
472
|
+
// Delete the sample row
|
|
473
|
+
await this.chunksTable.delete("id = '__init__'");
|
|
474
|
+
return this.chunksTable;
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Ensure code_file_index table exists.
|
|
478
|
+
*/
|
|
479
|
+
async ensureFileIndexTable() {
|
|
480
|
+
if (this.fileIndexTable)
|
|
481
|
+
return this.fileIndexTable;
|
|
482
|
+
const sampleData = [
|
|
483
|
+
{
|
|
484
|
+
path: '__init__',
|
|
485
|
+
mtime: 0,
|
|
486
|
+
content_hash: '',
|
|
487
|
+
chunk_ids: '[]',
|
|
488
|
+
language: '',
|
|
489
|
+
indexed_at: 0,
|
|
490
|
+
},
|
|
491
|
+
];
|
|
492
|
+
this.fileIndexTable = await this.db.createTable('code_file_index', sampleData, { mode: 'overwrite' });
|
|
493
|
+
// Delete the sample row
|
|
494
|
+
await this.fileIndexTable.delete("path = '__init__'");
|
|
495
|
+
return this.fileIndexTable;
|
|
496
|
+
}
|
|
497
|
+
getFtsColumn() {
|
|
498
|
+
return this.codeFtsConfig.normalizeIdentifiers ? 'content_normalized' : 'content';
|
|
499
|
+
}
|
|
500
|
+
async createOrUpdateFtsIndex() {
|
|
501
|
+
if (!this.codeFtsConfig.enabled) {
|
|
502
|
+
return;
|
|
503
|
+
}
|
|
504
|
+
const table = await this.ensureChunksTable();
|
|
505
|
+
const count = await table.countRows();
|
|
506
|
+
if (count === 0) {
|
|
507
|
+
return;
|
|
508
|
+
}
|
|
509
|
+
const columnToIndex = this.getFtsColumn();
|
|
510
|
+
const indexName = `${columnToIndex}_idx`;
|
|
511
|
+
const staleIndexName = `${columnToIndex === 'content' ? 'content_normalized' : 'content'}_idx`;
|
|
512
|
+
try {
|
|
513
|
+
const indices = await table.listIndices();
|
|
514
|
+
if (indices.some((index) => index.name === staleIndexName)) {
|
|
515
|
+
await table.dropIndex(staleIndexName);
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
catch (err) {
|
|
519
|
+
console.warn('[code-indexer] Could not inspect/drop stale FTS index:', err.message);
|
|
520
|
+
}
|
|
521
|
+
console.log(`[code-indexer] Creating/updating FTS index on ${columnToIndex} (${count} chunks)...`);
|
|
522
|
+
try {
|
|
523
|
+
await table.createIndex(columnToIndex, {
|
|
524
|
+
config: lancedb.Index.fts({
|
|
525
|
+
withPosition: this.codeFtsConfig.withPosition,
|
|
526
|
+
baseTokenizer: this.codeFtsConfig.baseTokenizer,
|
|
527
|
+
stem: this.codeFtsConfig.stem,
|
|
528
|
+
removeStopWords: this.codeFtsConfig.removeStopWords,
|
|
529
|
+
ngramMinLength: this.codeFtsConfig.ngramMinLength,
|
|
530
|
+
ngramMaxLength: this.codeFtsConfig.ngramMaxLength,
|
|
531
|
+
prefixOnly: this.codeFtsConfig.prefixOnly,
|
|
532
|
+
}),
|
|
533
|
+
replace: true,
|
|
534
|
+
});
|
|
535
|
+
await table.waitForIndex([indexName], 60);
|
|
536
|
+
console.log('[code-indexer] FTS index created successfully');
|
|
537
|
+
}
|
|
538
|
+
catch (err) {
|
|
539
|
+
console.warn('[code-indexer] Could not create FTS index:', err.message);
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
normalizeSearchScores(results) {
|
|
543
|
+
if (results.length === 0)
|
|
544
|
+
return [];
|
|
545
|
+
const values = results.map((r) => r.score);
|
|
546
|
+
const min = Math.min(...values);
|
|
547
|
+
const max = Math.max(...values);
|
|
548
|
+
if (max === min)
|
|
549
|
+
return values.map(() => 1);
|
|
550
|
+
return values.map((v) => (v - min) / (max - min));
|
|
551
|
+
}
|
|
552
|
+
/**
|
|
553
|
+
* Get file record from index.
|
|
554
|
+
*/
|
|
555
|
+
async getFileRecord(filePath) {
|
|
556
|
+
const table = await this.ensureFileIndexTable();
|
|
557
|
+
const results = await table
|
|
558
|
+
.query()
|
|
559
|
+
.where(`path = '${sanitizePath(filePath)}'`)
|
|
560
|
+
.toArray();
|
|
561
|
+
if (results.length === 0)
|
|
562
|
+
return null;
|
|
563
|
+
const r = results[0];
|
|
564
|
+
return {
|
|
565
|
+
path: r.path,
|
|
566
|
+
mtime: r.mtime,
|
|
567
|
+
content_hash: r.content_hash,
|
|
568
|
+
chunk_ids: r.chunk_ids,
|
|
569
|
+
language: r.language,
|
|
570
|
+
indexed_at: r.indexed_at,
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
getTreeDeps() {
|
|
574
|
+
return {
|
|
575
|
+
getSymbols: async (filePath) => this.getSymbolsForTree(filePath),
|
|
576
|
+
getFileRecord: async (filePath) => this.getCodeFileRecordForTree(filePath),
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
async getCodeFileRecordForTree(filePath) {
|
|
580
|
+
const record = await this.getFileRecord(filePath);
|
|
581
|
+
if (!record)
|
|
582
|
+
return null;
|
|
583
|
+
return {
|
|
584
|
+
mtime: record.mtime,
|
|
585
|
+
content_hash: record.content_hash,
|
|
586
|
+
language: record.language,
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
async getSymbolsForTree(filePath) {
|
|
590
|
+
const table = await this.ensureChunksTable();
|
|
591
|
+
const rows = await table
|
|
592
|
+
.query()
|
|
593
|
+
.where(`path = '${sanitizePath(filePath)}'`)
|
|
594
|
+
.toArray();
|
|
595
|
+
return rows.map((row) => ({
|
|
596
|
+
id: row.id,
|
|
597
|
+
path: row.path,
|
|
598
|
+
language: row.language,
|
|
599
|
+
symbolType: row.symbol_type,
|
|
600
|
+
symbolName: row.symbol_name,
|
|
601
|
+
signature: row.signature || null,
|
|
602
|
+
parentSymbol: row.parent_symbol || null,
|
|
603
|
+
scope: JSON.parse(row.scope || '[]'),
|
|
604
|
+
content: row.content,
|
|
605
|
+
startLine: row.start_line,
|
|
606
|
+
endLine: row.end_line,
|
|
607
|
+
docstring: row.docstring || null,
|
|
608
|
+
modified: row.modified,
|
|
609
|
+
contentHash: row.content_hash,
|
|
610
|
+
}));
|
|
611
|
+
}
|
|
612
|
+
/**
|
|
613
|
+
* Get all indexed file paths.
|
|
614
|
+
*/
|
|
615
|
+
async getAllIndexedPaths() {
|
|
616
|
+
const table = await this.ensureFileIndexTable();
|
|
617
|
+
const results = await table.query().select(['path']).toArray();
|
|
618
|
+
return new Set(results.map((r) => r.path));
|
|
619
|
+
}
|
|
620
|
+
/**
|
|
621
|
+
* Update file index record.
|
|
622
|
+
*/
|
|
623
|
+
async updateFileIndex(filePath, mtime, contentHash, chunkIds, language) {
|
|
624
|
+
const table = await this.ensureFileIndexTable();
|
|
625
|
+
// Delete existing record if any
|
|
626
|
+
await table.delete(`path = '${sanitizePath(filePath)}'`);
|
|
627
|
+
// Insert new record
|
|
628
|
+
await table.add([
|
|
629
|
+
{
|
|
630
|
+
path: filePath,
|
|
631
|
+
mtime,
|
|
632
|
+
content_hash: contentHash,
|
|
633
|
+
chunk_ids: JSON.stringify(chunkIds),
|
|
634
|
+
language,
|
|
635
|
+
indexed_at: Date.now(),
|
|
636
|
+
},
|
|
637
|
+
]);
|
|
638
|
+
}
|
|
639
|
+
/**
|
|
640
|
+
* Remove file from index.
|
|
641
|
+
*/
|
|
642
|
+
async removeFromFileIndex(filePath) {
|
|
643
|
+
const table = await this.ensureFileIndexTable();
|
|
644
|
+
await table.delete(`path = '${sanitizePath(filePath)}'`);
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Delete chunks by IDs.
|
|
648
|
+
*/
|
|
649
|
+
async deleteChunks(chunkIds) {
|
|
650
|
+
if (chunkIds.length === 0)
|
|
651
|
+
return;
|
|
652
|
+
const table = await this.ensureChunksTable();
|
|
653
|
+
for (const id of chunkIds) {
|
|
654
|
+
await table.delete(`id = '${id}'`);
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Delete all chunks for a file path.
|
|
659
|
+
*/
|
|
660
|
+
async deleteChunksForPath(filePath) {
|
|
661
|
+
const table = await this.ensureChunksTable();
|
|
662
|
+
await table.delete(`path = '${sanitizePath(filePath)}'`);
|
|
663
|
+
}
|
|
664
|
+
/**
|
|
665
|
+
* Index a single code chunk with retry logic.
|
|
666
|
+
*/
|
|
667
|
+
async indexChunk(chunk) {
|
|
668
|
+
const table = await this.ensureChunksTable();
|
|
669
|
+
// Build embedding text
|
|
670
|
+
const embeddingText = buildEmbeddingText(chunk);
|
|
671
|
+
// Get embedding with retry
|
|
672
|
+
let embedding;
|
|
673
|
+
try {
|
|
674
|
+
embedding = await withRetry(async () => {
|
|
675
|
+
const result = await this.embeddingProvider.getEmbedding(embeddingText, 'document');
|
|
676
|
+
if (isError(result)) {
|
|
677
|
+
throw new Error(result.message);
|
|
678
|
+
}
|
|
679
|
+
return result;
|
|
680
|
+
}, this.retryOptions);
|
|
681
|
+
}
|
|
682
|
+
catch (err) {
|
|
683
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
684
|
+
return createError('EMBEDDING_ERROR', error.message);
|
|
685
|
+
}
|
|
686
|
+
// Check if chunk exists
|
|
687
|
+
const existing = await table
|
|
688
|
+
.query()
|
|
689
|
+
.where(`id = '${chunk.id}'`)
|
|
690
|
+
.toArray();
|
|
691
|
+
if (existing.length > 0 && existing[0].content_hash === chunk.contentHash) {
|
|
692
|
+
return chunk.id; // No changes needed
|
|
693
|
+
}
|
|
694
|
+
// Delete existing if present
|
|
695
|
+
if (existing.length > 0) {
|
|
696
|
+
await table.delete(`id = '${chunk.id}'`);
|
|
697
|
+
}
|
|
698
|
+
// Insert new chunk using snake_case for SQL compatibility
|
|
699
|
+
const record = toChunkRecord(chunk, embedding, this.codeFtsConfig.normalizeIdentifiers);
|
|
700
|
+
await table.add([record]);
|
|
701
|
+
return chunk.id;
|
|
702
|
+
}
|
|
703
|
+
/**
|
|
704
|
+
* Index multiple chunks in batch - much faster than indexChunk() one by one.
|
|
705
|
+
* Uses batch embedding for efficiency with retry logic.
|
|
706
|
+
*
|
|
707
|
+
* @param chunks - Array of chunks to index
|
|
708
|
+
* @param batchSize - Embedding batch size (default: 32)
|
|
709
|
+
* @param onEmbeddingProgress - Optional callback for embedding progress
|
|
710
|
+
* @returns Array of indexed chunk IDs and any errors
|
|
711
|
+
*/
|
|
712
|
+
async indexChunksBatch(chunks, batchSize = 32, onEmbeddingProgress) {
|
|
713
|
+
if (chunks.length === 0) {
|
|
714
|
+
return { chunkIds: [], errors: [] };
|
|
715
|
+
}
|
|
716
|
+
const table = await this.ensureChunksTable();
|
|
717
|
+
const chunkIds = [];
|
|
718
|
+
const errors = [];
|
|
719
|
+
// Build embedding texts for all chunks
|
|
720
|
+
const embeddingTexts = chunks.map((chunk) => buildEmbeddingText(chunk));
|
|
721
|
+
// Get all embeddings in batch with retry logic
|
|
722
|
+
let embeddings;
|
|
723
|
+
try {
|
|
724
|
+
embeddings = await withRetry(async () => {
|
|
725
|
+
const result = await this.embeddingProvider.getEmbeddingsBatch(embeddingTexts, batchSize, 'document', onEmbeddingProgress);
|
|
726
|
+
if (isError(result)) {
|
|
727
|
+
throw new Error(result.message);
|
|
728
|
+
}
|
|
729
|
+
return result;
|
|
730
|
+
}, this.retryOptions);
|
|
731
|
+
}
|
|
732
|
+
catch (err) {
|
|
733
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
734
|
+
return { chunkIds: [], errors: [createError('EMBEDDING_ERROR', error.message)] };
|
|
735
|
+
}
|
|
736
|
+
// Build records for batch insert (snake_case for SQL compatibility)
|
|
737
|
+
const records = [];
|
|
738
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
739
|
+
const chunk = chunks[i];
|
|
740
|
+
const embedding = embeddings[i];
|
|
741
|
+
records.push(toChunkRecord(chunk, embedding, this.codeFtsConfig.normalizeIdentifiers));
|
|
742
|
+
chunkIds.push(chunk.id);
|
|
743
|
+
}
|
|
744
|
+
// Batch insert all records
|
|
745
|
+
if (records.length > 0) {
|
|
746
|
+
await table.add(records);
|
|
747
|
+
}
|
|
748
|
+
return { chunkIds, errors };
|
|
749
|
+
}
|
|
750
|
+
/**
|
|
751
|
+
* Parse a single file and return chunks without indexing.
|
|
752
|
+
* Used for batch processing.
|
|
753
|
+
*/
|
|
754
|
+
async parseFile(filePath, options = {}) {
|
|
755
|
+
const absolutePath = path.isAbsolute(filePath)
|
|
756
|
+
? filePath
|
|
757
|
+
: path.join(this.rootPath, filePath);
|
|
758
|
+
const relativePath = path.relative(this.rootPath, absolutePath);
|
|
759
|
+
try {
|
|
760
|
+
// Check file size
|
|
761
|
+
const stat = await fs.stat(absolutePath);
|
|
762
|
+
const maxSize = options.maxFileSize || this.codeConfig.maxFileSize || 1024 * 1024;
|
|
763
|
+
if (stat.size > maxSize) {
|
|
764
|
+
return createError('FILE_TOO_LARGE', `File exceeds max size: ${relativePath}`);
|
|
765
|
+
}
|
|
766
|
+
// Read file content
|
|
767
|
+
const content = await fs.readFile(absolutePath, 'utf-8');
|
|
768
|
+
const fileHash = generateContentHash(content);
|
|
769
|
+
// Detect language
|
|
770
|
+
const langKey = this.parser.getLanguageForFile(absolutePath);
|
|
771
|
+
if (!langKey) {
|
|
772
|
+
return createError('UNKNOWN_LANGUAGE', `Unknown file type: ${relativePath}`);
|
|
773
|
+
}
|
|
774
|
+
// Get extractor
|
|
775
|
+
const extractor = languageRegistry.getExtractor(langKey);
|
|
776
|
+
if (!extractor) {
|
|
777
|
+
return createError('NO_EXTRACTOR', `No extractor for language: ${langKey}`);
|
|
778
|
+
}
|
|
779
|
+
// Parse file
|
|
780
|
+
const tree = await this.parser.parse(content, langKey);
|
|
781
|
+
try {
|
|
782
|
+
// Extract symbols
|
|
783
|
+
const symbolsWithDetails = extractor.extractSymbols(tree, content, relativePath);
|
|
784
|
+
const symbols = symbolsWithDetails.map((s) => ({
|
|
785
|
+
name: s.name,
|
|
786
|
+
type: s.type,
|
|
787
|
+
startLine: s.startLine,
|
|
788
|
+
endLine: s.endLine,
|
|
789
|
+
node: s.node,
|
|
790
|
+
}));
|
|
791
|
+
// Convert to chunks
|
|
792
|
+
const minLines = options.minLines || this.codeConfig.chunking?.minLines || 3;
|
|
793
|
+
const maxLines = options.maxLines || this.codeConfig.chunking?.maxLines || 100;
|
|
794
|
+
const chunks = symbolsToChunks(symbols, relativePath, content, langKey, stat.mtimeMs, { minLines, maxLines });
|
|
795
|
+
// Add metadata from extractor
|
|
796
|
+
// Match by symbolName + startLine since symbolsToChunks filters by minLines
|
|
797
|
+
const detailsMap = new Map(symbolsWithDetails.map(d => [`${d.name}:${d.startLine}`, d]));
|
|
798
|
+
for (const chunk of chunks) {
|
|
799
|
+
if (chunk.symbolType === 'file')
|
|
800
|
+
continue; // Skip file chunks
|
|
801
|
+
const details = detailsMap.get(`${chunk.symbolName}:${chunk.startLine}`);
|
|
802
|
+
if (details) {
|
|
803
|
+
chunk.signature = details.signature;
|
|
804
|
+
chunk.docstring = details.docstring;
|
|
805
|
+
chunk.parentSymbol = details.parentSymbol;
|
|
806
|
+
chunk.scope = details.scope;
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
applyStableChunkIds(chunks);
|
|
810
|
+
// Add file-level chunk
|
|
811
|
+
const includeFileChunks = options.includeFileChunks ?? this.codeConfig.chunking?.includeFileChunks ?? true;
|
|
812
|
+
if (includeFileChunks) {
|
|
813
|
+
const fileChunk = createFileChunk(relativePath, content, langKey, stat.mtimeMs);
|
|
814
|
+
chunks.unshift(fileChunk);
|
|
815
|
+
}
|
|
816
|
+
return { chunks, fileHash, mtime: stat.mtimeMs, language: langKey };
|
|
817
|
+
}
|
|
818
|
+
finally {
|
|
819
|
+
this.parser.deleteTree(tree);
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
catch (err) {
|
|
823
|
+
return createError('PARSE_FILE_ERROR', `Failed to parse ${relativePath}`, {
|
|
824
|
+
error: err.message,
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
/**
|
|
829
|
+
* Index a single file with WAL support.
|
|
830
|
+
*/
|
|
831
|
+
async indexFile(filePath, options = {}, batchId) {
|
|
832
|
+
await this.initialize();
|
|
833
|
+
const absolutePath = path.isAbsolute(filePath)
|
|
834
|
+
? filePath
|
|
835
|
+
: path.join(this.rootPath, filePath);
|
|
836
|
+
const relativePath = path.relative(this.rootPath, absolutePath);
|
|
837
|
+
const chunkIds = [];
|
|
838
|
+
const errors = [];
|
|
839
|
+
// Write WAL start entry
|
|
840
|
+
if (this.walEnabled) {
|
|
841
|
+
await this.wal.start(relativePath, batchId);
|
|
842
|
+
}
|
|
843
|
+
try {
|
|
844
|
+
// Check file size
|
|
845
|
+
const stat = await fs.stat(absolutePath);
|
|
846
|
+
const maxSize = options.maxFileSize || this.codeConfig.maxFileSize || 1024 * 1024;
|
|
847
|
+
if (stat.size > maxSize) {
|
|
848
|
+
const err = createError('FILE_TOO_LARGE', `File exceeds max size: ${relativePath}`);
|
|
849
|
+
if (this.walEnabled) {
|
|
850
|
+
const retryCount = await this.wal.getRetryCount(relativePath);
|
|
851
|
+
await this.wal.fail(relativePath, err.message, retryCount + 1);
|
|
852
|
+
}
|
|
853
|
+
return { chunkIds, errors: [err] };
|
|
854
|
+
}
|
|
855
|
+
// Read file content
|
|
856
|
+
const content = await fs.readFile(absolutePath, 'utf-8');
|
|
857
|
+
const fileHash = generateContentHash(content);
|
|
858
|
+
// Detect language
|
|
859
|
+
const langKey = this.parser.getLanguageForFile(absolutePath);
|
|
860
|
+
if (!langKey) {
|
|
861
|
+
const err = createError('UNKNOWN_LANGUAGE', `Unknown file type: ${relativePath}`);
|
|
862
|
+
if (this.walEnabled) {
|
|
863
|
+
const retryCount = await this.wal.getRetryCount(relativePath);
|
|
864
|
+
await this.wal.fail(relativePath, err.message, retryCount + 1);
|
|
865
|
+
}
|
|
866
|
+
return { chunkIds, errors: [err] };
|
|
867
|
+
}
|
|
868
|
+
// Get extractor for this language
|
|
869
|
+
const extractor = languageRegistry.getExtractor(langKey);
|
|
870
|
+
if (!extractor) {
|
|
871
|
+
const err = createError('NO_EXTRACTOR', `No extractor for language: ${langKey}`);
|
|
872
|
+
if (this.walEnabled) {
|
|
873
|
+
const retryCount = await this.wal.getRetryCount(relativePath);
|
|
874
|
+
await this.wal.fail(relativePath, err.message, retryCount + 1);
|
|
875
|
+
}
|
|
876
|
+
return { chunkIds, errors: [err] };
|
|
877
|
+
}
|
|
878
|
+
// Parse file
|
|
879
|
+
const tree = await this.parser.parse(content, langKey);
|
|
880
|
+
try {
|
|
881
|
+
// Extract symbols using language-specific extractor
|
|
882
|
+
const symbolsWithDetails = extractor.extractSymbols(tree, content, relativePath);
|
|
883
|
+
// Convert to base ExtractedSymbol format for chunker
|
|
884
|
+
const symbols = symbolsWithDetails.map((s) => ({
|
|
885
|
+
name: s.name,
|
|
886
|
+
type: s.type,
|
|
887
|
+
startLine: s.startLine,
|
|
888
|
+
endLine: s.endLine,
|
|
889
|
+
node: s.node,
|
|
890
|
+
}));
|
|
891
|
+
// Convert symbols to chunks
|
|
892
|
+
const minLines = options.minLines || this.codeConfig.chunking?.minLines || 3;
|
|
893
|
+
const maxLines = options.maxLines || this.codeConfig.chunking?.maxLines || 100;
|
|
894
|
+
const chunks = symbolsToChunks(symbols, relativePath, content, langKey, stat.mtimeMs, { minLines, maxLines });
|
|
895
|
+
// Add detailed metadata from extractor to chunks
|
|
896
|
+
// Match by symbolName + startLine since symbolsToChunks filters by minLines
|
|
897
|
+
const detailsMap = new Map(symbolsWithDetails.map(d => [`${d.name}:${d.startLine}`, d]));
|
|
898
|
+
for (const chunk of chunks) {
|
|
899
|
+
if (chunk.symbolType === 'file')
|
|
900
|
+
continue; // Skip file chunks
|
|
901
|
+
const details = detailsMap.get(`${chunk.symbolName}:${chunk.startLine}`);
|
|
902
|
+
if (details) {
|
|
903
|
+
chunk.signature = details.signature;
|
|
904
|
+
chunk.docstring = details.docstring;
|
|
905
|
+
chunk.parentSymbol = details.parentSymbol;
|
|
906
|
+
chunk.scope = details.scope;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
applyStableChunkIds(chunks);
|
|
910
|
+
// Optionally add file-level chunk
|
|
911
|
+
const includeFileChunks = options.includeFileChunks ?? this.codeConfig.chunking?.includeFileChunks ?? true;
|
|
912
|
+
if (includeFileChunks) {
|
|
913
|
+
const fileChunk = createFileChunk(relativePath, content, langKey, stat.mtimeMs);
|
|
914
|
+
chunks.unshift(fileChunk);
|
|
915
|
+
}
|
|
916
|
+
// Delete old chunks for this file
|
|
917
|
+
await this.deleteChunksForPath(relativePath);
|
|
918
|
+
// Index all chunks
|
|
919
|
+
for (const chunk of chunks) {
|
|
920
|
+
const result = await this.indexChunk(chunk);
|
|
921
|
+
if (isError(result)) {
|
|
922
|
+
errors.push(result);
|
|
923
|
+
}
|
|
924
|
+
else {
|
|
925
|
+
chunkIds.push(result);
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
// Update file index
|
|
929
|
+
await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, chunkIds, langKey);
|
|
930
|
+
if (this.graphIndexer) {
|
|
931
|
+
await this.graphIndexer.updateFiles([relativePath]);
|
|
932
|
+
}
|
|
933
|
+
if (this.treeIndexer) {
|
|
934
|
+
await this.treeIndexer.upsertFile(relativePath, this.getTreeDeps());
|
|
935
|
+
}
|
|
936
|
+
// Write WAL done entry
|
|
937
|
+
if (this.walEnabled) {
|
|
938
|
+
await this.wal.done(relativePath, chunkIds.length);
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
finally {
|
|
942
|
+
// Always delete tree to prevent memory leaks
|
|
943
|
+
this.parser.deleteTree(tree);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
catch (err) {
|
|
947
|
+
const error = createError('INDEX_FILE_ERROR', `Failed to index ${relativePath}`, {
|
|
948
|
+
error: err.message,
|
|
949
|
+
});
|
|
950
|
+
errors.push(error);
|
|
951
|
+
// Write WAL fail entry
|
|
952
|
+
if (this.walEnabled) {
|
|
953
|
+
const retryCount = await this.wal.getRetryCount(relativePath);
|
|
954
|
+
await this.wal.fail(relativePath, err.message, retryCount + 1);
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
return { chunkIds, errors };
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Reindex all files in root path.
|
|
961
|
+
* Uses batch processing for much faster performance with WAL support.
|
|
962
|
+
*/
|
|
963
|
+
async reindexAll(options = {}) {
|
|
964
|
+
await this.initialize();
|
|
965
|
+
return withIndexLock(this.localStateDir, async () => {
|
|
966
|
+
const filter = this.createFileFilter(options);
|
|
967
|
+
const include = options.include || this.codeConfig.include || DEFAULT_INCLUDE;
|
|
968
|
+
// Find all matching files
|
|
969
|
+
const patterns = include.map((p) => path.join(this.rootPath, p));
|
|
970
|
+
let files = [];
|
|
971
|
+
for (const pattern of patterns) {
|
|
972
|
+
const matches = await glob(pattern, {
|
|
973
|
+
ignore: options.exclude || this.codeConfig.exclude || DEFAULT_EXCLUDE,
|
|
974
|
+
nodir: true,
|
|
975
|
+
});
|
|
976
|
+
files.push(...matches);
|
|
977
|
+
}
|
|
978
|
+
// Deduplicate and filter
|
|
979
|
+
files = [...new Set(files)].filter(filter);
|
|
980
|
+
// Generate batch ID for WAL
|
|
981
|
+
const batchId = crypto.randomUUID();
|
|
982
|
+
const relativePaths = files.map(f => path.relative(this.rootPath, f));
|
|
983
|
+
// Write batch start to WAL
|
|
984
|
+
if (this.walEnabled) {
|
|
985
|
+
await this.wal.batchStart(batchId, relativePaths);
|
|
986
|
+
}
|
|
987
|
+
let indexed = 0;
|
|
988
|
+
let skipped = 0;
|
|
989
|
+
let deleted = 0;
|
|
990
|
+
let failed = 0;
|
|
991
|
+
const errors = [];
|
|
992
|
+
// Track current file paths
|
|
993
|
+
const currentPaths = new Set();
|
|
994
|
+
const indexedPaths = await this.getAllIndexedPaths();
|
|
995
|
+
const filesToIndex = [];
|
|
996
|
+
const pathsToDelete = [];
|
|
997
|
+
for (const file of files) {
|
|
998
|
+
const relativePath = path.relative(this.rootPath, file);
|
|
999
|
+
currentPaths.add(relativePath);
|
|
1000
|
+
// Write file start to WAL
|
|
1001
|
+
if (this.walEnabled) {
|
|
1002
|
+
await this.wal.start(relativePath, batchId);
|
|
1003
|
+
}
|
|
1004
|
+
try {
|
|
1005
|
+
const stat = await fs.stat(file);
|
|
1006
|
+
// Check if file needs reindexing
|
|
1007
|
+
const existingRecord = await this.getFileRecord(relativePath);
|
|
1008
|
+
if (existingRecord && existingRecord.mtime === stat.mtimeMs) {
|
|
1009
|
+
skipped++;
|
|
1010
|
+
// Mark as done (skipped = no change)
|
|
1011
|
+
if (this.walEnabled) {
|
|
1012
|
+
await this.wal.done(relativePath, 0);
|
|
1013
|
+
}
|
|
1014
|
+
continue;
|
|
1015
|
+
}
|
|
1016
|
+
// Read and check content hash
|
|
1017
|
+
const content = await fs.readFile(file, 'utf-8');
|
|
1018
|
+
const fileHash = generateContentHash(content);
|
|
1019
|
+
if (existingRecord && existingRecord.content_hash === fileHash) {
|
|
1020
|
+
// Content unchanged, just update mtime
|
|
1021
|
+
await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, JSON.parse(existingRecord.chunk_ids), existingRecord.language);
|
|
1022
|
+
skipped++;
|
|
1023
|
+
// Mark as done
|
|
1024
|
+
if (this.walEnabled) {
|
|
1025
|
+
await this.wal.done(relativePath, 0);
|
|
1026
|
+
}
|
|
1027
|
+
continue;
|
|
1028
|
+
}
|
|
1029
|
+
// File changed - parse it (but don't index yet)
|
|
1030
|
+
const parseResult = await this.parseFile(file, options);
|
|
1031
|
+
if (isError(parseResult)) {
|
|
1032
|
+
errors.push(parseResult);
|
|
1033
|
+
failed++;
|
|
1034
|
+
if (this.walEnabled) {
|
|
1035
|
+
const retryCount = await this.wal.getRetryCount(relativePath);
|
|
1036
|
+
await this.wal.fail(relativePath, parseResult.message, retryCount + 1);
|
|
1037
|
+
}
|
|
1038
|
+
continue;
|
|
1039
|
+
}
|
|
1040
|
+
// Collect old chunks for deletion
|
|
1041
|
+
if (existingRecord) {
|
|
1042
|
+
pathsToDelete.push(relativePath);
|
|
1043
|
+
}
|
|
1044
|
+
filesToIndex.push({
|
|
1045
|
+
relativePath,
|
|
1046
|
+
chunks: parseResult.chunks,
|
|
1047
|
+
fileHash: parseResult.fileHash,
|
|
1048
|
+
mtime: parseResult.mtime,
|
|
1049
|
+
language: parseResult.language,
|
|
1050
|
+
});
|
|
1051
|
+
}
|
|
1052
|
+
catch (err) {
|
|
1053
|
+
const error = createError('INDEX_FILE_ERROR', `Failed to parse ${relativePath}`, {
|
|
1054
|
+
error: err.message,
|
|
1055
|
+
});
|
|
1056
|
+
errors.push(error);
|
|
1057
|
+
failed++;
|
|
1058
|
+
if (this.walEnabled) {
|
|
1059
|
+
const retryCount = await this.wal.getRetryCount(relativePath);
|
|
1060
|
+
await this.wal.fail(relativePath, err.message, retryCount + 1);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
// Phase 2: Delete old chunks for files that will be reindexed
|
|
1065
|
+
for (const pathToDelete of pathsToDelete) {
|
|
1066
|
+
await this.deleteChunksForPath(pathToDelete);
|
|
1067
|
+
}
|
|
1068
|
+
// Phase 3: Batch index all chunks
|
|
1069
|
+
if (filesToIndex.length > 0) {
|
|
1070
|
+
// Collect all chunks from all files
|
|
1071
|
+
const allChunks = [];
|
|
1072
|
+
const chunkToFileMap = new Map();
|
|
1073
|
+
for (const fileResult of filesToIndex) {
|
|
1074
|
+
const startIdx = allChunks.length;
|
|
1075
|
+
allChunks.push(...fileResult.chunks);
|
|
1076
|
+
// Map chunk indices to file for later file index update
|
|
1077
|
+
for (let i = startIdx; i < allChunks.length; i++) {
|
|
1078
|
+
chunkToFileMap.set(i, fileResult);
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
// Batch index all chunks at once (larger batch = fewer API calls)
|
|
1082
|
+
const batchResult = await this.indexChunksBatch(allChunks, 128);
|
|
1083
|
+
indexed = batchResult.chunkIds.length;
|
|
1084
|
+
errors.push(...batchResult.errors);
|
|
1085
|
+
// Update file indices and write WAL done entries
|
|
1086
|
+
const fileChunkIds = new Map();
|
|
1087
|
+
for (let i = 0; i < batchResult.chunkIds.length; i++) {
|
|
1088
|
+
const fileResult = chunkToFileMap.get(i);
|
|
1089
|
+
if (fileResult) {
|
|
1090
|
+
const ids = fileChunkIds.get(fileResult.relativePath) || [];
|
|
1091
|
+
ids.push(batchResult.chunkIds[i]);
|
|
1092
|
+
fileChunkIds.set(fileResult.relativePath, ids);
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
for (const fileResult of filesToIndex) {
|
|
1096
|
+
const chunkIds = fileChunkIds.get(fileResult.relativePath) || [];
|
|
1097
|
+
await this.updateFileIndex(fileResult.relativePath, fileResult.mtime, fileResult.fileHash, chunkIds, fileResult.language);
|
|
1098
|
+
// Write WAL done entry for each file
|
|
1099
|
+
if (this.walEnabled) {
|
|
1100
|
+
await this.wal.done(fileResult.relativePath, chunkIds.length);
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
// Phase 4: Handle deleted files
|
|
1105
|
+
for (const indexedPath of indexedPaths) {
|
|
1106
|
+
if (!currentPaths.has(indexedPath)) {
|
|
1107
|
+
const record = await this.getFileRecord(indexedPath);
|
|
1108
|
+
if (record) {
|
|
1109
|
+
const chunkIds = JSON.parse(record.chunk_ids);
|
|
1110
|
+
await this.deleteChunks(chunkIds);
|
|
1111
|
+
await this.removeFromFileIndex(indexedPath);
|
|
1112
|
+
deleted++;
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
// Write batch done to WAL
|
|
1117
|
+
if (this.walEnabled) {
|
|
1118
|
+
await this.wal.batchDone(batchId, { indexed, failed });
|
|
1119
|
+
// Compact WAL after successful batch
|
|
1120
|
+
await this.wal.compact();
|
|
1121
|
+
}
|
|
1122
|
+
await this.createOrUpdateFtsIndex();
|
|
1123
|
+
if (this.graphIndexer) {
|
|
1124
|
+
await this.graphIndexer.indexAll();
|
|
1125
|
+
}
|
|
1126
|
+
if (this.treeIndexer) {
|
|
1127
|
+
await this.treeIndexer.buildTree(this.getTreeDeps());
|
|
1128
|
+
}
|
|
1129
|
+
return { indexed, skipped, deleted, errors };
|
|
1130
|
+
});
|
|
1131
|
+
}
|
|
1132
|
+
/**
|
|
1133
|
+
* Reindex all files using unified SyncWal for recovery.
|
|
1134
|
+
* Uses offset-based recovery (deterministic file ordering).
|
|
1135
|
+
*
|
|
1136
|
+
* @param recovery Optional recovery plan from SyncWal
|
|
1137
|
+
* @returns IndexResult with counts
|
|
1138
|
+
*/
|
|
1139
|
+
async reindexAllWithSyncWal(recovery) {
|
|
1140
|
+
if (!this.syncWal) {
|
|
1141
|
+
throw new Error('SyncWal not provided - use reindexAll() instead');
|
|
1142
|
+
}
|
|
1143
|
+
await this.initialize();
|
|
1144
|
+
const filter = this.createFileFilter({});
|
|
1145
|
+
const include = this.codeConfig.include || DEFAULT_INCLUDE;
|
|
1146
|
+
// Find all matching files
|
|
1147
|
+
const patterns = include.map((p) => path.join(this.rootPath, p));
|
|
1148
|
+
let files = [];
|
|
1149
|
+
for (const pattern of patterns) {
|
|
1150
|
+
const matches = await glob(pattern, {
|
|
1151
|
+
ignore: this.codeConfig.exclude || DEFAULT_EXCLUDE,
|
|
1152
|
+
nodir: true,
|
|
1153
|
+
});
|
|
1154
|
+
files.push(...matches);
|
|
1155
|
+
}
|
|
1156
|
+
// Deduplicate, filter, and sort deterministically
|
|
1157
|
+
files = [...new Set(files)].filter(filter).sort((a, b) => a.localeCompare(b));
|
|
1158
|
+
// Compute manifest hash for recovery validation
|
|
1159
|
+
const manifestHash = this.computeManifestHash(files);
|
|
1160
|
+
// Validate recovery plan
|
|
1161
|
+
let skipCount = 0;
|
|
1162
|
+
if (recovery) {
|
|
1163
|
+
if (recovery.manifestHash === manifestHash) {
|
|
1164
|
+
skipCount = recovery.skipCount;
|
|
1165
|
+
console.log(`[code-indexer] Resuming from file ${skipCount}/${files.length}`);
|
|
1166
|
+
}
|
|
1167
|
+
else {
|
|
1168
|
+
// Files changed since crash — full reindex
|
|
1169
|
+
console.warn('[code-indexer] Manifest hash mismatch, starting full reindex');
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
// Start task (acquires lock, starts heartbeat)
|
|
1173
|
+
const taskId = await this.syncWal.startTask('code', files.length, manifestHash);
|
|
1174
|
+
let indexed = 0;
|
|
1175
|
+
let skipped = 0;
|
|
1176
|
+
let deleted = 0;
|
|
1177
|
+
const errors = [];
|
|
1178
|
+
// Track current file paths
|
|
1179
|
+
const currentPaths = new Set();
|
|
1180
|
+
const indexedPaths = await this.getAllIndexedPaths();
|
|
1181
|
+
const filesToIndex = [];
|
|
1182
|
+
const pathsToDelete = [];
|
|
1183
|
+
try {
|
|
1184
|
+
for (let i = skipCount; i < files.length; i++) {
|
|
1185
|
+
// Check cancellation
|
|
1186
|
+
if (this.syncWal.isCancelled(taskId)) {
|
|
1187
|
+
await this.syncWal.cancelTask(taskId);
|
|
1188
|
+
return { indexed, skipped, deleted, errors };
|
|
1189
|
+
}
|
|
1190
|
+
// Check pause - wait until resumed or cancelled
|
|
1191
|
+
const shouldContinue = await this.syncWal.waitWhilePaused(taskId);
|
|
1192
|
+
if (!shouldContinue) {
|
|
1193
|
+
// Cancelled while paused
|
|
1194
|
+
await this.syncWal.cancelTask(taskId);
|
|
1195
|
+
return { indexed, skipped, deleted, errors };
|
|
1196
|
+
}
|
|
1197
|
+
const file = files[i];
|
|
1198
|
+
const relativePath = path.relative(this.rootPath, file);
|
|
1199
|
+
currentPaths.add(relativePath);
|
|
1200
|
+
try {
|
|
1201
|
+
const stat = await fs.stat(file);
|
|
1202
|
+
// Check if file needs reindexing
|
|
1203
|
+
const existingRecord = await this.getFileRecord(relativePath);
|
|
1204
|
+
if (existingRecord && existingRecord.mtime === stat.mtimeMs) {
|
|
1205
|
+
skipped++;
|
|
1206
|
+
await this.syncWal.fileProcessed(taskId, relativePath, 0, 'updated');
|
|
1207
|
+
continue;
|
|
1208
|
+
}
|
|
1209
|
+
// Read and check content hash
|
|
1210
|
+
const content = await fs.readFile(file, 'utf-8');
|
|
1211
|
+
const fileHash = generateContentHash(content);
|
|
1212
|
+
if (existingRecord && existingRecord.content_hash === fileHash) {
|
|
1213
|
+
// Content unchanged, just update mtime
|
|
1214
|
+
await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, JSON.parse(existingRecord.chunk_ids), existingRecord.language);
|
|
1215
|
+
skipped++;
|
|
1216
|
+
await this.syncWal.fileProcessed(taskId, relativePath, 0, 'updated');
|
|
1217
|
+
continue;
|
|
1218
|
+
}
|
|
1219
|
+
// File changed - parse it
|
|
1220
|
+
const parseResult = await this.parseFile(file, {});
|
|
1221
|
+
if (isError(parseResult)) {
|
|
1222
|
+
errors.push(parseResult);
|
|
1223
|
+
await this.syncWal.fileFailed(taskId, relativePath, parseResult.message, 0);
|
|
1224
|
+
continue;
|
|
1225
|
+
}
|
|
1226
|
+
// Collect old chunks for deletion
|
|
1227
|
+
if (existingRecord) {
|
|
1228
|
+
pathsToDelete.push(relativePath);
|
|
1229
|
+
}
|
|
1230
|
+
filesToIndex.push({
|
|
1231
|
+
relativePath,
|
|
1232
|
+
chunks: parseResult.chunks,
|
|
1233
|
+
fileHash: parseResult.fileHash,
|
|
1234
|
+
mtime: parseResult.mtime,
|
|
1235
|
+
language: parseResult.language,
|
|
1236
|
+
});
|
|
1237
|
+
}
|
|
1238
|
+
catch (err) {
|
|
1239
|
+
const error = createError('INDEX_FILE_ERROR', `Failed to parse ${relativePath}`, {
|
|
1240
|
+
error: err.message,
|
|
1241
|
+
});
|
|
1242
|
+
errors.push(error);
|
|
1243
|
+
await this.syncWal.fileFailed(taskId, relativePath, err.message, 0);
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
// Phase 2: Delete old chunks for files that will be reindexed
|
|
1247
|
+
this.syncWal.setPhase('deleting');
|
|
1248
|
+
for (const pathToDelete of pathsToDelete) {
|
|
1249
|
+
await this.deleteChunksForPath(pathToDelete);
|
|
1250
|
+
}
|
|
1251
|
+
// Phase 3: Batch index all chunks
|
|
1252
|
+
this.syncWal.setPhase('indexing');
|
|
1253
|
+
if (filesToIndex.length > 0) {
|
|
1254
|
+
const allChunks = [];
|
|
1255
|
+
const chunkToFileMap = new Map();
|
|
1256
|
+
for (const fileResult of filesToIndex) {
|
|
1257
|
+
const startIdx = allChunks.length;
|
|
1258
|
+
allChunks.push(...fileResult.chunks);
|
|
1259
|
+
for (let i = startIdx; i < allChunks.length; i++) {
|
|
1260
|
+
chunkToFileMap.set(i, fileResult);
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
// Batch index with embedding progress callback
|
|
1264
|
+
const embeddingProgressCallback = this.syncWal?.getEmbeddingProgressCallback();
|
|
1265
|
+
const batchResult = await this.indexChunksBatch(allChunks, 128, embeddingProgressCallback);
|
|
1266
|
+
indexed = batchResult.chunkIds.length;
|
|
1267
|
+
errors.push(...batchResult.errors);
|
|
1268
|
+
// Update file indices
|
|
1269
|
+
const fileChunkIds = new Map();
|
|
1270
|
+
for (let i = 0; i < batchResult.chunkIds.length; i++) {
|
|
1271
|
+
const fileResult = chunkToFileMap.get(i);
|
|
1272
|
+
if (fileResult) {
|
|
1273
|
+
const ids = fileChunkIds.get(fileResult.relativePath) || [];
|
|
1274
|
+
ids.push(batchResult.chunkIds[i]);
|
|
1275
|
+
fileChunkIds.set(fileResult.relativePath, ids);
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
for (const fileResult of filesToIndex) {
|
|
1279
|
+
const chunkIds = fileChunkIds.get(fileResult.relativePath) || [];
|
|
1280
|
+
await this.updateFileIndex(fileResult.relativePath, fileResult.mtime, fileResult.fileHash, chunkIds, fileResult.language);
|
|
1281
|
+
await this.syncWal.fileProcessed(taskId, fileResult.relativePath, chunkIds.length, 'added');
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
// Phase 4: Handle deleted files
|
|
1285
|
+
for (const indexedPath of indexedPaths) {
|
|
1286
|
+
if (!currentPaths.has(indexedPath)) {
|
|
1287
|
+
const record = await this.getFileRecord(indexedPath);
|
|
1288
|
+
if (record) {
|
|
1289
|
+
const chunkIds = JSON.parse(record.chunk_ids);
|
|
1290
|
+
await this.deleteChunks(chunkIds);
|
|
1291
|
+
await this.removeFromFileIndex(indexedPath);
|
|
1292
|
+
deleted++;
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
await this.createOrUpdateFtsIndex();
|
|
1297
|
+
// Run graph/tree indexing (blocking to ensure completion before sync:completed)
|
|
1298
|
+
if (this.graphIndexer) {
|
|
1299
|
+
console.log('[code-indexer] starting graphIndexer.indexAll()');
|
|
1300
|
+
this.syncWal.setPhase('graph');
|
|
1301
|
+
await this.graphIndexer.indexAll(undefined, {
|
|
1302
|
+
onProgress: (current, total, phase) => {
|
|
1303
|
+
this.syncWal?.emitSubPhaseProgress(current, total, `graph:${phase}`);
|
|
1304
|
+
},
|
|
1305
|
+
});
|
|
1306
|
+
console.log('[code-indexer] graphIndexer.indexAll() done');
|
|
1307
|
+
}
|
|
1308
|
+
if (this.treeIndexer) {
|
|
1309
|
+
console.log('[code-indexer] starting treeIndexer.buildTree()');
|
|
1310
|
+
this.syncWal.setPhase('tree');
|
|
1311
|
+
await this.treeIndexer.buildTree(this.getTreeDeps(), {
|
|
1312
|
+
onProgress: (current, total, phase) => {
|
|
1313
|
+
this.syncWal?.emitSubPhaseProgress(current, total, `tree:${phase}`);
|
|
1314
|
+
},
|
|
1315
|
+
});
|
|
1316
|
+
console.log('[code-indexer] treeIndexer.buildTree() done');
|
|
1317
|
+
}
|
|
1318
|
+
// Complete task AFTER all indexing is done
|
|
1319
|
+
await this.syncWal.completeTask(taskId);
|
|
1320
|
+
}
|
|
1321
|
+
catch (err) {
|
|
1322
|
+
// Task failed
|
|
1323
|
+
await this.syncWal.failTask(taskId, err.message);
|
|
1324
|
+
throw err;
|
|
1325
|
+
}
|
|
1326
|
+
return { indexed, skipped, deleted, errors };
|
|
1327
|
+
}
|
|
1328
|
+
async rebuildFtsIndex() {
|
|
1329
|
+
await this.initialize();
|
|
1330
|
+
await this.createOrUpdateFtsIndex();
|
|
1331
|
+
}
|
|
1332
|
+
/**
|
|
1333
|
+
* Set the SyncWal instance for unified recovery.
|
|
1334
|
+
* Should be called when SyncWal is created after CodeIndexer construction.
|
|
1335
|
+
*/
|
|
1336
|
+
setSyncWal(syncWal) {
|
|
1337
|
+
this.syncWal = syncWal;
|
|
1338
|
+
}
|
|
1339
|
+
/**
|
|
1340
|
+
* Get the SyncWal instance.
|
|
1341
|
+
*/
|
|
1342
|
+
getSyncWal() {
|
|
1343
|
+
return this.syncWal;
|
|
1344
|
+
}
|
|
1345
|
+
/**
|
|
1346
|
+
* Check if SyncWal is configured.
|
|
1347
|
+
*/
|
|
1348
|
+
hasSyncWal() {
|
|
1349
|
+
return this.syncWal !== null;
|
|
1350
|
+
}
|
|
1351
|
+
/**
|
|
1352
|
+
* Search code by semantic query.
|
|
1353
|
+
*/
|
|
1354
|
+
async search(query, options = {}) {
|
|
1355
|
+
await this.initialize();
|
|
1356
|
+
const { limit = 10, threshold = 0.5, language, symbolTypes, path: pathFilter, rerank = true, } = options;
|
|
1357
|
+
const table = await this.ensureChunksTable();
|
|
1358
|
+
// Check if table has data
|
|
1359
|
+
const count = await table.countRows();
|
|
1360
|
+
if (count === 0) {
|
|
1361
|
+
return [];
|
|
1362
|
+
}
|
|
1363
|
+
// Get query embedding (use 'query' type for Voyage AI asymmetric search)
|
|
1364
|
+
const queryEmbedding = await this.embeddingProvider.getEmbedding(query, 'query');
|
|
1365
|
+
if (isError(queryEmbedding)) {
|
|
1366
|
+
return queryEmbedding;
|
|
1367
|
+
}
|
|
1368
|
+
const useReranking = Boolean(rerank &&
|
|
1369
|
+
this.rerankerProvider &&
|
|
1370
|
+
this.rerankingConfig.enabled &&
|
|
1371
|
+
this.rerankingConfig.provider !== 'none');
|
|
1372
|
+
const fetchLimit = useReranking
|
|
1373
|
+
? Math.max(this.rerankingConfig.topK, limit)
|
|
1374
|
+
: limit * 2;
|
|
1375
|
+
const vectorThreshold = useReranking
|
|
1376
|
+
? this.rerankingConfig.prerankThreshold
|
|
1377
|
+
: threshold;
|
|
1378
|
+
// Build search query
|
|
1379
|
+
let searchQuery = table.search(queryEmbedding).limit(fetchLimit);
|
|
1380
|
+
// Apply filters
|
|
1381
|
+
const filters = [];
|
|
1382
|
+
if (language) {
|
|
1383
|
+
const lang = sanitizeIdentifier(language);
|
|
1384
|
+
if (!VALID_LANGUAGES.has(lang)) {
|
|
1385
|
+
return createError('INVALID_LANGUAGE', `Unknown language: ${lang}`);
|
|
1386
|
+
}
|
|
1387
|
+
filters.push(`language = '${lang}'`);
|
|
1388
|
+
}
|
|
1389
|
+
if (symbolTypes && symbolTypes.length > 0) {
|
|
1390
|
+
const validTypes = symbolTypes
|
|
1391
|
+
.map((t) => sanitizeIdentifier(t))
|
|
1392
|
+
.filter((t) => VALID_SYMBOL_TYPES.has(t));
|
|
1393
|
+
if (validTypes.length === 0) {
|
|
1394
|
+
return createError('INVALID_SYMBOL_TYPES', 'No valid symbol types provided');
|
|
1395
|
+
}
|
|
1396
|
+
const types = validTypes.map((t) => `'${t}'`).join(', ');
|
|
1397
|
+
filters.push(`symbol_type IN (${types})`);
|
|
1398
|
+
}
|
|
1399
|
+
if (pathFilter) {
|
|
1400
|
+
const safePath = sanitizePath(pathFilter);
|
|
1401
|
+
filters.push(`path LIKE '${safePath}%'`);
|
|
1402
|
+
}
|
|
1403
|
+
if (filters.length > 0) {
|
|
1404
|
+
searchQuery = searchQuery.where(filters.join(' AND '));
|
|
1405
|
+
}
|
|
1406
|
+
const results = await searchQuery.toArray();
|
|
1407
|
+
// Convert L2 distance to cosine similarity, filter and deduplicate
|
|
1408
|
+
const processed = [];
|
|
1409
|
+
const seenContentHashes = new Set();
|
|
1410
|
+
for (const r of results) {
|
|
1411
|
+
const distance = r._distance;
|
|
1412
|
+
// For normalized vectors: L2^2 = 2(1 - cos_sim)
|
|
1413
|
+
const score = 1 - distance / 2;
|
|
1414
|
+
if (score < vectorThreshold)
|
|
1415
|
+
continue;
|
|
1416
|
+
// Deduplicate by content_hash (same code in file chunk and symbol chunk)
|
|
1417
|
+
const contentHash = r.content_hash;
|
|
1418
|
+
if (seenContentHashes.has(contentHash))
|
|
1419
|
+
continue;
|
|
1420
|
+
seenContentHashes.add(contentHash);
|
|
1421
|
+
const result = fromChunkRecord(r);
|
|
1422
|
+
result.score = score;
|
|
1423
|
+
processed.push(result);
|
|
1424
|
+
if (processed.length >= fetchLimit)
|
|
1425
|
+
break;
|
|
1426
|
+
}
|
|
1427
|
+
if (processed.length === 0) {
|
|
1428
|
+
return [];
|
|
1429
|
+
}
|
|
1430
|
+
if (useReranking && processed.length >= 5 && this.rerankerProvider) {
|
|
1431
|
+
try {
|
|
1432
|
+
if (!this.rerankerProvider.isInitialized()) {
|
|
1433
|
+
await this.rerankerProvider.initialize();
|
|
1434
|
+
}
|
|
1435
|
+
const documents = processed.map((item) => item.content);
|
|
1436
|
+
const reranked = await this.rerankerProvider.rerank(query, documents, {
|
|
1437
|
+
topK: limit,
|
|
1438
|
+
});
|
|
1439
|
+
if (!isError(reranked)) {
|
|
1440
|
+
const rerankedResults = reranked
|
|
1441
|
+
.filter((rr) => rr.index >= 0 && rr.index < processed.length)
|
|
1442
|
+
.map((rr) => ({
|
|
1443
|
+
...processed[rr.index],
|
|
1444
|
+
score: rr.score,
|
|
1445
|
+
_originalScore: processed[rr.index].score,
|
|
1446
|
+
}))
|
|
1447
|
+
.filter((result) => result.score >= threshold);
|
|
1448
|
+
return rerankedResults.slice(0, limit);
|
|
1449
|
+
}
|
|
1450
|
+
console.warn('[code-indexer] Reranking failed, using vector scores:', reranked.message);
|
|
1451
|
+
}
|
|
1452
|
+
catch (error) {
|
|
1453
|
+
console.warn('[code-indexer] Reranking error, falling back to vector scores:', error);
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
return processed
|
|
1457
|
+
.filter((result) => result.score >= threshold)
|
|
1458
|
+
.slice(0, limit);
|
|
1459
|
+
}
|
|
1460
|
+
async searchFts(options) {
|
|
1461
|
+
await this.initialize();
|
|
1462
|
+
const query = options.query.trim();
|
|
1463
|
+
if (!query) {
|
|
1464
|
+
return createError('INVALID_QUERY', 'Query must not be empty');
|
|
1465
|
+
}
|
|
1466
|
+
const table = await this.ensureChunksTable();
|
|
1467
|
+
const count = await table.countRows();
|
|
1468
|
+
if (count === 0) {
|
|
1469
|
+
return [];
|
|
1470
|
+
}
|
|
1471
|
+
const limit = options.limit ?? 10;
|
|
1472
|
+
const ftsColumn = this.getFtsColumn();
|
|
1473
|
+
const phraseQuery = options.phraseMatch
|
|
1474
|
+
? new PhraseQuery(query, ftsColumn, { slop: options.phraseSlop ?? 0 })
|
|
1475
|
+
: null;
|
|
1476
|
+
const fuzzyQuery = options.fuzziness !== undefined
|
|
1477
|
+
? new MatchQuery(query, ftsColumn, { fuzziness: options.fuzziness })
|
|
1478
|
+
: null;
|
|
1479
|
+
let ftsQuery = query;
|
|
1480
|
+
if (phraseQuery && fuzzyQuery) {
|
|
1481
|
+
ftsQuery = new BooleanQuery([
|
|
1482
|
+
[Occur.Should, phraseQuery],
|
|
1483
|
+
[Occur.Should, fuzzyQuery],
|
|
1484
|
+
]);
|
|
1485
|
+
}
|
|
1486
|
+
else if (phraseQuery) {
|
|
1487
|
+
ftsQuery = phraseQuery;
|
|
1488
|
+
}
|
|
1489
|
+
else if (fuzzyQuery) {
|
|
1490
|
+
ftsQuery = fuzzyQuery;
|
|
1491
|
+
}
|
|
1492
|
+
let searchQuery = table.search(ftsQuery, 'fts', ftsColumn).limit(limit);
|
|
1493
|
+
const filters = [];
|
|
1494
|
+
if (options.language) {
|
|
1495
|
+
const lang = sanitizeIdentifier(options.language);
|
|
1496
|
+
if (!VALID_LANGUAGES.has(lang)) {
|
|
1497
|
+
return createError('INVALID_LANGUAGE', `Unknown language: ${lang}`);
|
|
1498
|
+
}
|
|
1499
|
+
filters.push(`language = '${lang}'`);
|
|
1500
|
+
}
|
|
1501
|
+
if (options.symbolTypes && options.symbolTypes.length > 0) {
|
|
1502
|
+
const validTypes = options.symbolTypes
|
|
1503
|
+
.map((t) => sanitizeIdentifier(t))
|
|
1504
|
+
.filter((t) => VALID_SYMBOL_TYPES.has(t));
|
|
1505
|
+
if (validTypes.length === 0) {
|
|
1506
|
+
return createError('INVALID_SYMBOL_TYPES', 'No valid symbol types provided');
|
|
1507
|
+
}
|
|
1508
|
+
const types = validTypes.map((t) => `'${t}'`).join(', ');
|
|
1509
|
+
filters.push(`symbol_type IN (${types})`);
|
|
1510
|
+
}
|
|
1511
|
+
const pathFilter = options.path
|
|
1512
|
+
? buildPathPrefixFilter('path', options.path)
|
|
1513
|
+
: null;
|
|
1514
|
+
const whereClause = combineFilters([...filters, pathFilter]);
|
|
1515
|
+
if (whereClause) {
|
|
1516
|
+
searchQuery = searchQuery.where(whereClause);
|
|
1517
|
+
}
|
|
1518
|
+
const rows = await searchQuery.toArray();
|
|
1519
|
+
const results = [];
|
|
1520
|
+
for (const row of rows) {
|
|
1521
|
+
const result = fromChunkRecord(row);
|
|
1522
|
+
const ftsScore = typeof row._score === 'number' ? row._score : undefined;
|
|
1523
|
+
const fallbackScore = typeof row._distance === 'number'
|
|
1524
|
+
? 1 - row._distance / 2
|
|
1525
|
+
: 1;
|
|
1526
|
+
result.score = ftsScore ?? fallbackScore;
|
|
1527
|
+
results.push(result);
|
|
1528
|
+
}
|
|
1529
|
+
return results;
|
|
1530
|
+
}
|
|
1531
|
+
async searchHybrid(options) {
|
|
1532
|
+
const query = options.query.trim();
|
|
1533
|
+
if (!query) {
|
|
1534
|
+
return createError('INVALID_QUERY', 'Query must not be empty');
|
|
1535
|
+
}
|
|
1536
|
+
const limit = options.limit ?? 10;
|
|
1537
|
+
const threshold = options.threshold ?? 0.5;
|
|
1538
|
+
const semanticLimit = options.semanticLimit ?? Math.max(limit * 3, 20);
|
|
1539
|
+
const ftsLimit = options.ftsLimit ?? Math.max(limit * 3, 20);
|
|
1540
|
+
const vectorWeight = options.vectorWeight ?? 0.55;
|
|
1541
|
+
const ftsWeight = options.ftsWeight ?? 0.45;
|
|
1542
|
+
const rerankEnabled = options.rerank ?? true;
|
|
1543
|
+
const [semanticResults, ftsResults] = await Promise.all([
|
|
1544
|
+
this.search(query, {
|
|
1545
|
+
limit: semanticLimit,
|
|
1546
|
+
threshold: 0,
|
|
1547
|
+
language: options.language,
|
|
1548
|
+
symbolTypes: options.symbolTypes,
|
|
1549
|
+
path: options.path,
|
|
1550
|
+
rerank: false,
|
|
1551
|
+
}),
|
|
1552
|
+
this.searchFts({
|
|
1553
|
+
query,
|
|
1554
|
+
limit: ftsLimit,
|
|
1555
|
+
fuzziness: options.fuzziness,
|
|
1556
|
+
phraseMatch: options.phraseMatch,
|
|
1557
|
+
phraseSlop: options.phraseSlop,
|
|
1558
|
+
language: options.language,
|
|
1559
|
+
symbolTypes: options.symbolTypes,
|
|
1560
|
+
path: options.path,
|
|
1561
|
+
}),
|
|
1562
|
+
]);
|
|
1563
|
+
if (isError(semanticResults))
|
|
1564
|
+
return semanticResults;
|
|
1565
|
+
if (isError(ftsResults))
|
|
1566
|
+
return ftsResults;
|
|
1567
|
+
if (semanticResults.length === 0 && ftsResults.length === 0)
|
|
1568
|
+
return [];
|
|
1569
|
+
const semanticNorm = this.normalizeSearchScores(semanticResults);
|
|
1570
|
+
const ftsNorm = this.normalizeSearchScores(ftsResults);
|
|
1571
|
+
const merged = new Map();
|
|
1572
|
+
for (let i = 0; i < semanticResults.length; i++) {
|
|
1573
|
+
const item = semanticResults[i];
|
|
1574
|
+
const existing = merged.get(item.id);
|
|
1575
|
+
if (existing) {
|
|
1576
|
+
existing.vector = Math.max(existing.vector, semanticNorm[i] ?? 0);
|
|
1577
|
+
}
|
|
1578
|
+
else {
|
|
1579
|
+
merged.set(item.id, { item, vector: semanticNorm[i] ?? 0, fts: 0 });
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
for (let i = 0; i < ftsResults.length; i++) {
|
|
1583
|
+
const item = ftsResults[i];
|
|
1584
|
+
const existing = merged.get(item.id);
|
|
1585
|
+
if (existing) {
|
|
1586
|
+
existing.fts = Math.max(existing.fts, ftsNorm[i] ?? 0);
|
|
1587
|
+
}
|
|
1588
|
+
else {
|
|
1589
|
+
merged.set(item.id, { item, vector: 0, fts: ftsNorm[i] ?? 0 });
|
|
1590
|
+
}
|
|
1591
|
+
}
|
|
1592
|
+
const fused = Array.from(merged.values())
|
|
1593
|
+
.map(({ item, vector, fts }) => {
|
|
1594
|
+
const overlapBoost = vector > 0 && fts > 0 ? 0.05 : 0;
|
|
1595
|
+
const score = Math.min(1, vectorWeight * vector + ftsWeight * fts + overlapBoost);
|
|
1596
|
+
return { ...item, score };
|
|
1597
|
+
})
|
|
1598
|
+
.sort((a, b) => b.score - a.score);
|
|
1599
|
+
const useReranking = Boolean(rerankEnabled &&
|
|
1600
|
+
this.rerankerProvider &&
|
|
1601
|
+
this.rerankingConfig.enabled &&
|
|
1602
|
+
this.rerankingConfig.provider !== 'none');
|
|
1603
|
+
if (useReranking && fused.length >= 5 && this.rerankerProvider) {
|
|
1604
|
+
try {
|
|
1605
|
+
if (!this.rerankerProvider.isInitialized()) {
|
|
1606
|
+
await this.rerankerProvider.initialize();
|
|
1607
|
+
}
|
|
1608
|
+
const candidateLimit = Math.max(this.rerankingConfig.topK, limit);
|
|
1609
|
+
const candidates = fused.slice(0, candidateLimit);
|
|
1610
|
+
const reranked = await this.rerankerProvider.rerank(query, candidates.map((c) => c.content), { topK: limit });
|
|
1611
|
+
if (!isError(reranked)) {
|
|
1612
|
+
return reranked
|
|
1613
|
+
.filter((rr) => rr.index >= 0 && rr.index < candidates.length)
|
|
1614
|
+
.map((rr) => ({
|
|
1615
|
+
...candidates[rr.index],
|
|
1616
|
+
score: rr.score,
|
|
1617
|
+
_originalScore: candidates[rr.index].score,
|
|
1618
|
+
}))
|
|
1619
|
+
.filter((result) => result.score >= threshold)
|
|
1620
|
+
.slice(0, limit);
|
|
1621
|
+
}
|
|
1622
|
+
console.warn('[code-indexer] Hybrid reranking failed, using fused scores:', reranked.message);
|
|
1623
|
+
}
|
|
1624
|
+
catch (error) {
|
|
1625
|
+
console.warn('[code-indexer] Hybrid reranking error, using fused scores:', error);
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
return fused
|
|
1629
|
+
.filter((result) => result.score >= threshold)
|
|
1630
|
+
.slice(0, limit);
|
|
1631
|
+
}
|
|
1632
|
+
/**
|
|
1633
|
+
* Find symbols by name pattern (exact match or wildcard).
|
|
1634
|
+
*/
|
|
1635
|
+
async findSymbols(name, options = {}) {
|
|
1636
|
+
await this.initialize();
|
|
1637
|
+
const { type, path: pathFilter, limit = 20 } = options;
|
|
1638
|
+
const table = await this.ensureChunksTable();
|
|
1639
|
+
// Check if table has data
|
|
1640
|
+
const count = await table.countRows();
|
|
1641
|
+
if (count === 0) {
|
|
1642
|
+
return [];
|
|
1643
|
+
}
|
|
1644
|
+
// Build filters
|
|
1645
|
+
const filters = [];
|
|
1646
|
+
// Handle wildcard in name
|
|
1647
|
+
if (name.includes('*')) {
|
|
1648
|
+
const sqlPattern = name.replace(/\*/g, '%');
|
|
1649
|
+
filters.push(`symbol_name LIKE '${sanitizePath(sqlPattern)}'`);
|
|
1650
|
+
}
|
|
1651
|
+
else {
|
|
1652
|
+
filters.push(`symbol_name = '${sanitizePath(name)}'`);
|
|
1653
|
+
}
|
|
1654
|
+
if (type && type !== 'all') {
|
|
1655
|
+
const validType = sanitizeIdentifier(type);
|
|
1656
|
+
if (!VALID_SYMBOL_TYPES.has(validType)) {
|
|
1657
|
+
return createError('INVALID_SYMBOL_TYPE', `Unknown symbol type: ${type}`);
|
|
1658
|
+
}
|
|
1659
|
+
filters.push(`symbol_type = '${validType}'`);
|
|
1660
|
+
}
|
|
1661
|
+
if (pathFilter) {
|
|
1662
|
+
// Normalize path separators to match stored paths
|
|
1663
|
+
const normalizedPath = pathFilter.replace(/\\/g, '/');
|
|
1664
|
+
filters.push(`path LIKE '${sanitizePath(normalizedPath)}%'`);
|
|
1665
|
+
}
|
|
1666
|
+
// Query with filters
|
|
1667
|
+
let query = table.query().where(filters.join(' AND '));
|
|
1668
|
+
query = query.limit(limit);
|
|
1669
|
+
const results = await query.toArray();
|
|
1670
|
+
return results.map((r) => fromChunkRecord(r));
|
|
1671
|
+
}
|
|
1672
|
+
/**
|
|
1673
|
+
* Remove a file from the index.
|
|
1674
|
+
*/
|
|
1675
|
+
async removeFile(filePath) {
|
|
1676
|
+
await this.initialize();
|
|
1677
|
+
const relativePath = path.isAbsolute(filePath)
|
|
1678
|
+
? path.relative(this.rootPath, filePath)
|
|
1679
|
+
: filePath;
|
|
1680
|
+
const record = await this.getFileRecord(relativePath);
|
|
1681
|
+
if (record) {
|
|
1682
|
+
const chunkIds = JSON.parse(record.chunk_ids);
|
|
1683
|
+
await this.deleteChunks(chunkIds);
|
|
1684
|
+
await this.removeFromFileIndex(relativePath);
|
|
1685
|
+
}
|
|
1686
|
+
else {
|
|
1687
|
+
// Fallback: delete by path
|
|
1688
|
+
await this.deleteChunksForPath(relativePath);
|
|
1689
|
+
}
|
|
1690
|
+
if (this.graphIndexer) {
|
|
1691
|
+
await this.graphIndexer.removeFile(relativePath);
|
|
1692
|
+
}
|
|
1693
|
+
if (this.treeIndexer) {
|
|
1694
|
+
await this.treeIndexer.removeFile(relativePath);
|
|
1695
|
+
}
|
|
1696
|
+
}
|
|
1697
|
+
/**
|
|
1698
|
+
* Clear all code index data.
|
|
1699
|
+
*/
|
|
1700
|
+
async clear() {
|
|
1701
|
+
await this.initialize();
|
|
1702
|
+
if (this.chunksTable) {
|
|
1703
|
+
await this.db.dropTable('code_chunks');
|
|
1704
|
+
this.chunksTable = null;
|
|
1705
|
+
}
|
|
1706
|
+
if (this.fileIndexTable) {
|
|
1707
|
+
await this.db.dropTable('code_file_index');
|
|
1708
|
+
this.fileIndexTable = null;
|
|
1709
|
+
}
|
|
1710
|
+
if (this.graphIndexer) {
|
|
1711
|
+
await this.graphIndexer.clear();
|
|
1712
|
+
}
|
|
1713
|
+
if (this.treeIndexer) {
|
|
1714
|
+
await this.treeIndexer.clear();
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1717
|
+
/**
|
|
1718
|
+
* Get index statistics.
|
|
1719
|
+
*/
|
|
1720
|
+
async getStats() {
|
|
1721
|
+
await this.initialize();
|
|
1722
|
+
const chunksTable = await this.ensureChunksTable();
|
|
1723
|
+
const fileTable = await this.ensureFileIndexTable();
|
|
1724
|
+
const totalChunks = await chunksTable.countRows();
|
|
1725
|
+
const totalFiles = await fileTable.countRows();
|
|
1726
|
+
// Count by language
|
|
1727
|
+
const fileRecords = await fileTable.query().select(['language']).toArray();
|
|
1728
|
+
const languages = {};
|
|
1729
|
+
for (const r of fileRecords) {
|
|
1730
|
+
const lang = r.language;
|
|
1731
|
+
languages[lang] = (languages[lang] || 0) + 1;
|
|
1732
|
+
}
|
|
1733
|
+
return { totalChunks, totalFiles, languages };
|
|
1734
|
+
}
|
|
1735
|
+
/**
|
|
1736
|
+
* Get set of indexed file paths for quick lookup.
|
|
1737
|
+
*/
|
|
1738
|
+
async getIndexedFilePaths() {
|
|
1739
|
+
await this.initialize();
|
|
1740
|
+
const fileTable = await this.ensureFileIndexTable();
|
|
1741
|
+
const records = await fileTable.query().select(['path']).toArray();
|
|
1742
|
+
return new Set(records.map(r => r.path));
|
|
1743
|
+
}
|
|
1744
|
+
/**
|
|
1745
|
+
* Returns true when code graph indexing is configured.
|
|
1746
|
+
*/
|
|
1747
|
+
hasGraphIndexer() {
|
|
1748
|
+
return this.graphIndexer !== null;
|
|
1749
|
+
}
|
|
1750
|
+
/**
|
|
1751
|
+
* Returns true when tree indexing is configured.
|
|
1752
|
+
*/
|
|
1753
|
+
hasTreeIndexer() {
|
|
1754
|
+
return this.treeIndexer !== null;
|
|
1755
|
+
}
|
|
1756
|
+
/**
|
|
1757
|
+
* Access tree indexer for tree retrieval tools.
|
|
1758
|
+
*/
|
|
1759
|
+
getTreeIndexer() {
|
|
1760
|
+
return this.treeIndexer;
|
|
1761
|
+
}
|
|
1762
|
+
/**
|
|
1763
|
+
* Rebuild tree index from existing code index data.
|
|
1764
|
+
* Useful when tree tables are missing/corrupted while code chunks already exist.
|
|
1765
|
+
*/
|
|
1766
|
+
async rebuildTreeIndex() {
|
|
1767
|
+
await this.initialize();
|
|
1768
|
+
if (!this.treeIndexer) {
|
|
1769
|
+
throw new Error('Tree index is not enabled');
|
|
1770
|
+
}
|
|
1771
|
+
const stats = await this.treeIndexer.buildTree(this.getTreeDeps());
|
|
1772
|
+
return {
|
|
1773
|
+
totalDirs: stats.totalDirs,
|
|
1774
|
+
totalFiles: stats.totalFiles,
|
|
1775
|
+
maxDepth: stats.maxDepth,
|
|
1776
|
+
buildTimeMs: stats.buildTimeMs,
|
|
1777
|
+
};
|
|
1778
|
+
}
|
|
1779
|
+
/**
|
|
1780
|
+
* Ensure tree index is queryable.
|
|
1781
|
+
* Auto-recovers when root dir is missing or tree has never been indexed.
|
|
1782
|
+
*/
|
|
1783
|
+
async ensureTreeIndexReady() {
|
|
1784
|
+
await this.initialize();
|
|
1785
|
+
if (!this.treeIndexer) {
|
|
1786
|
+
throw new Error('Tree index is not enabled');
|
|
1787
|
+
}
|
|
1788
|
+
const [rootDir, freshness] = await Promise.all([
|
|
1789
|
+
this.treeIndexer.getDirByPath(''),
|
|
1790
|
+
Promise.resolve(this.treeIndexer.getFreshnessInfo()),
|
|
1791
|
+
]);
|
|
1792
|
+
if (!rootDir || freshness.indexed_at === 0) {
|
|
1793
|
+
await this.treeIndexer.buildTree(this.getTreeDeps());
|
|
1794
|
+
}
|
|
1795
|
+
}
|
|
1796
|
+
/**
|
|
1797
|
+
* Prepare graph data in-memory without embedding calls and without graph DB writes.
|
|
1798
|
+
* Useful for fast validation on different codebases.
|
|
1799
|
+
*/
|
|
1800
|
+
async codeGraphPrepare(quality = this.codeConfig.graph.quality, options = {}) {
|
|
1801
|
+
await this.initialize();
|
|
1802
|
+
if (!this.graphIndexer) {
|
|
1803
|
+
throw new Error('Code graph indexing is not enabled');
|
|
1804
|
+
}
|
|
1805
|
+
const filter = this.createFileFilter(options);
|
|
1806
|
+
const include = options.include || this.codeConfig.include || DEFAULT_INCLUDE;
|
|
1807
|
+
let files = [];
|
|
1808
|
+
for (const pattern of include.map((p) => path.join(this.rootPath, p))) {
|
|
1809
|
+
const matches = await glob(pattern, {
|
|
1810
|
+
ignore: options.exclude || this.codeConfig.exclude || DEFAULT_EXCLUDE,
|
|
1811
|
+
nodir: true,
|
|
1812
|
+
});
|
|
1813
|
+
files.push(...matches);
|
|
1814
|
+
}
|
|
1815
|
+
files = [...new Set(files)].filter(filter).sort((a, b) => a.localeCompare(b));
|
|
1816
|
+
const chunksByFile = new Map();
|
|
1817
|
+
const parseErrors = [];
|
|
1818
|
+
for (const file of files) {
|
|
1819
|
+
const parseResult = await this.parseFile(file, options);
|
|
1820
|
+
if (isError(parseResult)) {
|
|
1821
|
+
parseErrors.push(parseResult);
|
|
1822
|
+
continue;
|
|
1823
|
+
}
|
|
1824
|
+
const relativePath = path.relative(this.rootPath, file).replace(/\\/g, '/');
|
|
1825
|
+
for (const chunk of parseResult.chunks) {
|
|
1826
|
+
chunk.path = chunk.path.replace(/\\/g, '/');
|
|
1827
|
+
}
|
|
1828
|
+
chunksByFile.set(relativePath, parseResult.chunks);
|
|
1829
|
+
}
|
|
1830
|
+
const prepared = await this.graphIndexer.prepareGraph(quality, chunksByFile);
|
|
1831
|
+
return {
|
|
1832
|
+
...prepared,
|
|
1833
|
+
parseErrors,
|
|
1834
|
+
};
|
|
1835
|
+
}
|
|
1836
|
+
/**
|
|
1837
|
+
* Build full code graph index.
|
|
1838
|
+
*/
|
|
1839
|
+
async codeGraphIndex(quality) {
|
|
1840
|
+
await this.initialize();
|
|
1841
|
+
if (!this.graphIndexer) {
|
|
1842
|
+
throw new Error('Code graph indexing is not enabled');
|
|
1843
|
+
}
|
|
1844
|
+
return this.graphIndexer.indexAll(quality);
|
|
1845
|
+
}
|
|
1846
|
+
/**
|
|
1847
|
+
* Incremental code graph update.
|
|
1848
|
+
*/
|
|
1849
|
+
async codeGraphUpdate() {
|
|
1850
|
+
await this.initialize();
|
|
1851
|
+
if (!this.graphIndexer) {
|
|
1852
|
+
throw new Error('Code graph indexing is not enabled');
|
|
1853
|
+
}
|
|
1854
|
+
return this.graphIndexer.incrementalUpdate();
|
|
1855
|
+
}
|
|
1856
|
+
/**
|
|
1857
|
+
* Code graph statistics.
|
|
1858
|
+
*/
|
|
1859
|
+
async codeGraphInfo() {
|
|
1860
|
+
await this.initialize();
|
|
1861
|
+
if (!this.graphIndexer) {
|
|
1862
|
+
throw new Error('Code graph indexing is not enabled');
|
|
1863
|
+
}
|
|
1864
|
+
return this.graphIndexer.getStats();
|
|
1865
|
+
}
|
|
1866
|
+
/**
|
|
1867
|
+
* Find references to a symbol using graph edges.
|
|
1868
|
+
*/
|
|
1869
|
+
async graphRefs(input) {
|
|
1870
|
+
await this.initialize();
|
|
1871
|
+
if (!this.graphIndexer) {
|
|
1872
|
+
return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
|
|
1873
|
+
}
|
|
1874
|
+
return this.graphIndexer.graphRefs(input);
|
|
1875
|
+
}
|
|
1876
|
+
/**
|
|
1877
|
+
* Build callers/callees graph for a symbol.
|
|
1878
|
+
*/
|
|
1879
|
+
async graphCalls(input) {
|
|
1880
|
+
await this.initialize();
|
|
1881
|
+
if (!this.graphIndexer) {
|
|
1882
|
+
return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
|
|
1883
|
+
}
|
|
1884
|
+
return this.graphIndexer.graphCalls(input);
|
|
1885
|
+
}
|
|
1886
|
+
/**
|
|
1887
|
+
* Resolve import dependency graph for a file.
|
|
1888
|
+
*/
|
|
1889
|
+
async graphDeps(input) {
|
|
1890
|
+
await this.initialize();
|
|
1891
|
+
if (!this.graphIndexer) {
|
|
1892
|
+
return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
|
|
1893
|
+
}
|
|
1894
|
+
return this.graphIndexer.graphDeps(input);
|
|
1895
|
+
}
|
|
1896
|
+
/**
|
|
1897
|
+
* Build class/interface hierarchy graph.
|
|
1898
|
+
*/
|
|
1899
|
+
async graphHierarchy(input) {
|
|
1900
|
+
await this.initialize();
|
|
1901
|
+
if (!this.graphIndexer) {
|
|
1902
|
+
return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
|
|
1903
|
+
}
|
|
1904
|
+
return this.graphIndexer.graphHierarchy(input);
|
|
1905
|
+
}
|
|
1906
|
+
/**
|
|
1907
|
+
* Export graph edges as DOT or JSON.
|
|
1908
|
+
*/
|
|
1909
|
+
async codeGraphExport(format = 'json') {
|
|
1910
|
+
await this.initialize();
|
|
1911
|
+
if (!this.graphIndexer) {
|
|
1912
|
+
throw new Error('Code graph indexing is not enabled');
|
|
1913
|
+
}
|
|
1914
|
+
return this.graphIndexer.exportGraph(format);
|
|
1915
|
+
}
|
|
1916
|
+
// ========================================
|
|
1917
|
+
// WAL methods
|
|
1918
|
+
// ========================================
|
|
1919
|
+
/**
|
|
1920
|
+
* Get WAL status (pending files, failed files, etc.)
|
|
1921
|
+
*/
|
|
1922
|
+
async getWalStatus() {
|
|
1923
|
+
return this.wal.getStatus();
|
|
1924
|
+
}
|
|
1925
|
+
/**
|
|
1926
|
+
* Get recovery plan from legacy WAL.
|
|
1927
|
+
*/
|
|
1928
|
+
async getRecoveryPlan() {
|
|
1929
|
+
return this.wal.recover();
|
|
1930
|
+
}
|
|
1931
|
+
/**
|
|
1932
|
+
* Recover from interrupted indexing using WAL.
|
|
1933
|
+
* Re-indexes pending and failed files.
|
|
1934
|
+
*
|
|
1935
|
+
* @param maxRetries - Max retries per file for failed files
|
|
1936
|
+
* @returns Result with counts of recovered files
|
|
1937
|
+
*/
|
|
1938
|
+
async recover(maxRetries = 3) {
|
|
1939
|
+
await this.initialize();
|
|
1940
|
+
const plan = await this.wal.recover();
|
|
1941
|
+
let pending = 0;
|
|
1942
|
+
let retried = 0;
|
|
1943
|
+
let failed = 0;
|
|
1944
|
+
const errors = [];
|
|
1945
|
+
// Re-index pending files (interrupted during indexing)
|
|
1946
|
+
for (const filePath of plan.pending) {
|
|
1947
|
+
console.log(`[code-indexer] Recovering pending file: ${filePath}`);
|
|
1948
|
+
const result = await this.indexFile(filePath);
|
|
1949
|
+
if (result.errors.length > 0) {
|
|
1950
|
+
errors.push(...result.errors);
|
|
1951
|
+
failed++;
|
|
1952
|
+
}
|
|
1953
|
+
else {
|
|
1954
|
+
pending++;
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
// Retry failed files (if under max retries)
|
|
1958
|
+
for (const failedFile of plan.failed) {
|
|
1959
|
+
if (failedFile.retryCount >= maxRetries) {
|
|
1960
|
+
console.log(`[code-indexer] Skipping ${failedFile.path} (max retries reached: ${failedFile.retryCount})`);
|
|
1961
|
+
failed++;
|
|
1962
|
+
continue;
|
|
1963
|
+
}
|
|
1964
|
+
console.log(`[code-indexer] Retrying failed file: ${failedFile.path} (attempt ${failedFile.retryCount + 1})`);
|
|
1965
|
+
const result = await this.indexFile(failedFile.path);
|
|
1966
|
+
if (result.errors.length > 0) {
|
|
1967
|
+
errors.push(...result.errors);
|
|
1968
|
+
failed++;
|
|
1969
|
+
}
|
|
1970
|
+
else {
|
|
1971
|
+
retried++;
|
|
1972
|
+
}
|
|
1973
|
+
}
|
|
1974
|
+
// Handle incomplete batch
|
|
1975
|
+
if (plan.incompleteBatch && plan.incompleteBatchFiles.length > 0) {
|
|
1976
|
+
console.log(`[code-indexer] Recovering incomplete batch: ${plan.incompleteBatch}`);
|
|
1977
|
+
console.log(`[code-indexer] Files to recover: ${plan.incompleteBatchFiles.length}`);
|
|
1978
|
+
for (const filePath of plan.incompleteBatchFiles) {
|
|
1979
|
+
const result = await this.indexFile(filePath);
|
|
1980
|
+
if (result.errors.length > 0) {
|
|
1981
|
+
errors.push(...result.errors);
|
|
1982
|
+
failed++;
|
|
1983
|
+
}
|
|
1984
|
+
else {
|
|
1985
|
+
pending++;
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
}
|
|
1989
|
+
// Compact WAL after recovery
|
|
1990
|
+
await this.wal.compact();
|
|
1991
|
+
const needsTreeReconcile = this.treeIndexer && (plan.pending.length > 0 ||
|
|
1992
|
+
plan.failed.length > 0 ||
|
|
1993
|
+
plan.incompleteBatch !== null);
|
|
1994
|
+
if (needsTreeReconcile && this.treeIndexer) {
|
|
1995
|
+
await this.treeIndexer.reconcileAfterWalRecovery(this.getTreeDeps());
|
|
1996
|
+
}
|
|
1997
|
+
return { pending, retried, failed, errors };
|
|
1998
|
+
}
|
|
1999
|
+
/**
|
|
2000
|
+
* Retry failed files from WAL.
|
|
2001
|
+
*
|
|
2002
|
+
* @param maxRetries - Max total retries per file
|
|
2003
|
+
* @returns Result with counts
|
|
2004
|
+
*/
|
|
2005
|
+
async retryFailed(maxRetries = 3) {
|
|
2006
|
+
await this.initialize();
|
|
2007
|
+
const plan = await this.wal.recover();
|
|
2008
|
+
let retried = 0;
|
|
2009
|
+
let skipped = 0;
|
|
2010
|
+
const errors = [];
|
|
2011
|
+
for (const failedFile of plan.failed) {
|
|
2012
|
+
if (failedFile.retryCount >= maxRetries) {
|
|
2013
|
+
console.log(`[code-indexer] Skipping ${failedFile.path} (max retries: ${failedFile.retryCount})`);
|
|
2014
|
+
skipped++;
|
|
2015
|
+
continue;
|
|
2016
|
+
}
|
|
2017
|
+
console.log(`[code-indexer] Retrying: ${failedFile.path} (attempt ${failedFile.retryCount + 1})`);
|
|
2018
|
+
const result = await this.indexFile(failedFile.path);
|
|
2019
|
+
if (result.errors.length > 0) {
|
|
2020
|
+
errors.push(...result.errors);
|
|
2021
|
+
}
|
|
2022
|
+
else {
|
|
2023
|
+
retried++;
|
|
2024
|
+
}
|
|
2025
|
+
}
|
|
2026
|
+
// Compact WAL after retry
|
|
2027
|
+
await this.wal.compact();
|
|
2028
|
+
if (this.treeIndexer && retried > 0) {
|
|
2029
|
+
await this.treeIndexer.reconcileAfterWalRecovery(this.getTreeDeps());
|
|
2030
|
+
}
|
|
2031
|
+
return { retried, skipped, errors };
|
|
2032
|
+
}
|
|
2033
|
+
/**
|
|
2034
|
+
* Compact the WAL file.
|
|
2035
|
+
*/
|
|
2036
|
+
async compactWal() {
|
|
2037
|
+
return this.wal.compact();
|
|
2038
|
+
}
|
|
2039
|
+
/**
|
|
2040
|
+
* Clear the WAL file.
|
|
2041
|
+
*/
|
|
2042
|
+
async clearWal() {
|
|
2043
|
+
return this.wal.clear();
|
|
2044
|
+
}
|
|
2045
|
+
/**
|
|
2046
|
+
* Check if WAL exists.
|
|
2047
|
+
*/
|
|
2048
|
+
walExists() {
|
|
2049
|
+
return this.wal.exists();
|
|
2050
|
+
}
|
|
2051
|
+
/**
|
|
2052
|
+
* Get the root path.
|
|
2053
|
+
*/
|
|
2054
|
+
getRootPath() {
|
|
2055
|
+
return this.rootPath;
|
|
2056
|
+
}
|
|
2057
|
+
/**
|
|
2058
|
+
* Get the database path.
|
|
2059
|
+
*/
|
|
2060
|
+
getDbPath() {
|
|
2061
|
+
return this.dbPath;
|
|
2062
|
+
}
|
|
2063
|
+
}
|
|
2064
|
+
// Export withIndexLock for testing
|
|
2065
|
+
export { withIndexLock };
|
|
2066
|
+
// Factory function
|
|
2067
|
+
export function createCodeIndexer(rootPath, dbPath, deps, options) {
|
|
2068
|
+
return new CodeIndexer(rootPath, dbPath, deps, options);
|
|
2069
|
+
}
|
|
2070
|
+
//# sourceMappingURL=index.js.map
|