mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,708 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector store using hnswlib-node
|
|
3
|
+
*
|
|
4
|
+
* Supports both legacy (flat) and namespaced storage layouts:
|
|
5
|
+
* - Legacy: .mdcontext/vectors.bin, .mdcontext/vectors.meta.bin
|
|
6
|
+
* - Namespaced: .mdcontext/embeddings/{namespace}/vectors.bin, vectors.meta.bin
|
|
7
|
+
*
|
|
8
|
+
* New indexes are written using namespaced storage. Existing legacy indexes
|
|
9
|
+
* continue to be loaded from their original flat locations; this module does
|
|
10
|
+
* not perform automatic migration between layouts.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import * as fs from 'node:fs/promises'
|
|
14
|
+
import * as path from 'node:path'
|
|
15
|
+
import * as msgpack from '@msgpack/msgpack'
|
|
16
|
+
import { Effect } from 'effect'
|
|
17
|
+
import HierarchicalNSW from 'hnswlib-node'
|
|
18
|
+
import { DimensionMismatchError, VectorStoreError } from '../errors/index.js'
|
|
19
|
+
import { INDEX_DIR } from '../index/types.js'
|
|
20
|
+
import {
|
|
21
|
+
generateNamespace,
|
|
22
|
+
getNamespaceDir,
|
|
23
|
+
getMetaPath as getNamespacedMetaPath,
|
|
24
|
+
getVectorPath as getNamespacedVectorPath,
|
|
25
|
+
} from './embedding-namespace.js'
|
|
26
|
+
import type { VectorEntry, VectorIndex } from './types.js'
|
|
27
|
+
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// Constants
|
|
30
|
+
// ============================================================================
|
|
31
|
+
|
|
32
|
+
const VECTOR_INDEX_FILE = 'vectors.bin'
|
|
33
|
+
const VECTOR_META_FILE = 'vectors.meta.bin'
|
|
34
|
+
const INDEX_VERSION = 1
|
|
35
|
+
|
|
36
|
+
// ============================================================================
|
|
37
|
+
// Vector Store
|
|
38
|
+
// ============================================================================
|
|
39
|
+
|
|
40
|
+
export interface VectorSearchOptions {
|
|
41
|
+
/** efSearch parameter for HNSW (controls recall/speed tradeoff, default: 100) */
|
|
42
|
+
readonly efSearch?: number | undefined
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface VectorStore {
|
|
46
|
+
readonly rootPath: string
|
|
47
|
+
readonly dimensions: number
|
|
48
|
+
add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError>
|
|
49
|
+
search(
|
|
50
|
+
vector: number[],
|
|
51
|
+
limit: number,
|
|
52
|
+
threshold?: number,
|
|
53
|
+
options?: VectorSearchOptions,
|
|
54
|
+
): Effect.Effect<VectorSearchResult[], VectorStoreError>
|
|
55
|
+
/**
|
|
56
|
+
* Search with additional stats about below-threshold results.
|
|
57
|
+
* Used to provide feedback when 0 results pass the threshold.
|
|
58
|
+
*/
|
|
59
|
+
searchWithStats(
|
|
60
|
+
vector: number[],
|
|
61
|
+
limit: number,
|
|
62
|
+
threshold?: number,
|
|
63
|
+
options?: VectorSearchOptions,
|
|
64
|
+
): Effect.Effect<VectorSearchResultWithStats, VectorStoreError>
|
|
65
|
+
save(): Effect.Effect<void, VectorStoreError>
|
|
66
|
+
/**
|
|
67
|
+
* Load the vector store from disk.
|
|
68
|
+
*
|
|
69
|
+
* @returns VectorStoreLoadResult with loaded status and any warnings
|
|
70
|
+
* @throws DimensionMismatchError if the stored dimensions don't match current provider
|
|
71
|
+
*/
|
|
72
|
+
load(): Effect.Effect<
|
|
73
|
+
VectorStoreLoadResult,
|
|
74
|
+
VectorStoreError | DimensionMismatchError
|
|
75
|
+
>
|
|
76
|
+
getStats(): VectorStoreStats
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface VectorSearchResult {
|
|
80
|
+
readonly id: string
|
|
81
|
+
readonly sectionId: string
|
|
82
|
+
readonly documentPath: string
|
|
83
|
+
readonly heading: string
|
|
84
|
+
readonly similarity: number
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Extended search result with metadata about below-threshold results.
|
|
89
|
+
* Used to provide user feedback when 0 results pass the threshold.
|
|
90
|
+
*/
|
|
91
|
+
export interface VectorSearchResultWithStats {
|
|
92
|
+
readonly results: VectorSearchResult[]
|
|
93
|
+
/** Number of results that were found but below threshold */
|
|
94
|
+
readonly belowThresholdCount: number
|
|
95
|
+
/** Highest similarity score among below-threshold results (if any) */
|
|
96
|
+
readonly belowThresholdHighest: number | null
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface VectorStoreStats {
|
|
100
|
+
readonly count: number
|
|
101
|
+
readonly dimensions: number
|
|
102
|
+
readonly provider: string
|
|
103
|
+
readonly providerModel?: string | undefined
|
|
104
|
+
readonly totalCost: number
|
|
105
|
+
readonly totalTokens: number
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Result of loading a vector store, including any warnings about config mismatches.
|
|
110
|
+
*/
|
|
111
|
+
export interface VectorStoreLoadResult {
|
|
112
|
+
/** Whether the index was loaded successfully */
|
|
113
|
+
readonly loaded: boolean
|
|
114
|
+
/** Warning about HNSW parameter mismatch (if any) */
|
|
115
|
+
readonly hnswMismatch?: HnswMismatchWarning | undefined
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Warning when HNSW parameters in config differ from stored index parameters.
|
|
120
|
+
* The index was built with different parameters than currently configured.
|
|
121
|
+
*/
|
|
122
|
+
export interface HnswMismatchWarning {
|
|
123
|
+
/** Current config values */
|
|
124
|
+
readonly configParams: { m: number; efConstruction: number }
|
|
125
|
+
/** Values stored in the index */
|
|
126
|
+
readonly indexParams: { m: number; efConstruction: number }
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ============================================================================
|
|
130
|
+
// Implementation
|
|
131
|
+
// ============================================================================
|
|
132
|
+
|
|
133
|
+
class HnswVectorStore implements VectorStore {
|
|
134
|
+
readonly rootPath: string
|
|
135
|
+
readonly dimensions: number
|
|
136
|
+
|
|
137
|
+
private index: HierarchicalNSW.HierarchicalNSW | null = null
|
|
138
|
+
private entries: Map<number, VectorEntry> = new Map()
|
|
139
|
+
private idToIndex: Map<string, number> = new Map()
|
|
140
|
+
private nextIndex = 0
|
|
141
|
+
private provider = 'unknown'
|
|
142
|
+
private providerModel: string | undefined = undefined
|
|
143
|
+
private providerBaseURL: string | undefined = undefined
|
|
144
|
+
private totalCost = 0
|
|
145
|
+
private totalTokens = 0
|
|
146
|
+
|
|
147
|
+
// HNSW build parameters
|
|
148
|
+
private readonly hnswM: number
|
|
149
|
+
private readonly hnswEfConstruction: number
|
|
150
|
+
|
|
151
|
+
// Namespace support - when set, uses namespaced storage paths
|
|
152
|
+
private namespace: string | undefined = undefined
|
|
153
|
+
|
|
154
|
+
constructor(
|
|
155
|
+
rootPath: string,
|
|
156
|
+
dimensions: number,
|
|
157
|
+
hnswOptions?: HnswBuildOptions,
|
|
158
|
+
) {
|
|
159
|
+
this.rootPath = path.resolve(rootPath)
|
|
160
|
+
this.dimensions = dimensions
|
|
161
|
+
this.hnswM = hnswOptions?.m ?? 16
|
|
162
|
+
this.hnswEfConstruction = hnswOptions?.efConstruction ?? 200
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Set the namespace for this vector store.
|
|
167
|
+
* When set, all storage operations use the namespaced path.
|
|
168
|
+
*/
|
|
169
|
+
setNamespace(namespace: string): void {
|
|
170
|
+
this.namespace = namespace
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Get the current namespace (if any).
|
|
175
|
+
*/
|
|
176
|
+
getNamespace(): string | undefined {
|
|
177
|
+
return this.namespace
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Get the index directory path.
|
|
182
|
+
* Returns namespaced path if namespace is set, otherwise legacy path.
|
|
183
|
+
*/
|
|
184
|
+
private getIndexDir(): string {
|
|
185
|
+
if (this.namespace) {
|
|
186
|
+
return getNamespaceDir(this.rootPath, this.namespace)
|
|
187
|
+
}
|
|
188
|
+
return path.join(this.rootPath, INDEX_DIR)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Get the vector index file path.
|
|
193
|
+
*/
|
|
194
|
+
private getVectorPath(): string {
|
|
195
|
+
if (this.namespace) {
|
|
196
|
+
return getNamespacedVectorPath(this.rootPath, this.namespace)
|
|
197
|
+
}
|
|
198
|
+
return path.join(this.rootPath, INDEX_DIR, VECTOR_INDEX_FILE)
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Get the metadata file path.
|
|
203
|
+
*/
|
|
204
|
+
private getMetaPath(): string {
|
|
205
|
+
if (this.namespace) {
|
|
206
|
+
return getNamespacedMetaPath(this.rootPath, this.namespace)
|
|
207
|
+
}
|
|
208
|
+
return path.join(this.rootPath, INDEX_DIR, VECTOR_META_FILE)
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
private ensureIndex(): HierarchicalNSW.HierarchicalNSW {
|
|
212
|
+
if (!this.index) {
|
|
213
|
+
// Initialize with space for 10000 items, will resize as needed
|
|
214
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
215
|
+
'cosine',
|
|
216
|
+
this.dimensions,
|
|
217
|
+
)
|
|
218
|
+
// Use configured HNSW parameters (M, efConstruction, randomSeed)
|
|
219
|
+
this.index.initIndex(10000, this.hnswM, this.hnswEfConstruction, 100)
|
|
220
|
+
}
|
|
221
|
+
return this.index
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError> {
|
|
225
|
+
return Effect.try({
|
|
226
|
+
try: () => {
|
|
227
|
+
const index = this.ensureIndex()
|
|
228
|
+
|
|
229
|
+
for (const entry of entries) {
|
|
230
|
+
// Skip if already exists
|
|
231
|
+
if (this.idToIndex.has(entry.id)) {
|
|
232
|
+
continue
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const idx = this.nextIndex++
|
|
236
|
+
|
|
237
|
+
// Resize if needed
|
|
238
|
+
if (idx >= index.getMaxElements()) {
|
|
239
|
+
index.resizeIndex(index.getMaxElements() * 2)
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
index.addPoint(entry.embedding as number[], idx)
|
|
243
|
+
this.entries.set(idx, entry)
|
|
244
|
+
this.idToIndex.set(entry.id, idx)
|
|
245
|
+
}
|
|
246
|
+
},
|
|
247
|
+
catch: (e) =>
|
|
248
|
+
new VectorStoreError({
|
|
249
|
+
operation: 'add',
|
|
250
|
+
message: e instanceof Error ? e.message : String(e),
|
|
251
|
+
cause: e,
|
|
252
|
+
}),
|
|
253
|
+
})
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
search(
|
|
257
|
+
vector: number[],
|
|
258
|
+
limit: number,
|
|
259
|
+
threshold = 0,
|
|
260
|
+
options?: VectorSearchOptions,
|
|
261
|
+
): Effect.Effect<VectorSearchResult[], VectorStoreError> {
|
|
262
|
+
return Effect.try({
|
|
263
|
+
try: () => {
|
|
264
|
+
if (!this.index || this.entries.size === 0) {
|
|
265
|
+
return []
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Set efSearch if provided (controls recall/speed tradeoff)
|
|
269
|
+
if (options?.efSearch !== undefined) {
|
|
270
|
+
this.index.setEf(options.efSearch)
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const result = this.index.searchKnn(
|
|
274
|
+
vector,
|
|
275
|
+
Math.min(limit, this.entries.size),
|
|
276
|
+
)
|
|
277
|
+
const results: VectorSearchResult[] = []
|
|
278
|
+
|
|
279
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
280
|
+
const idx = result.neighbors[i]
|
|
281
|
+
const distance = result.distances[i]
|
|
282
|
+
|
|
283
|
+
if (idx === undefined || distance === undefined) {
|
|
284
|
+
continue
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Convert distance to similarity (cosine distance to cosine similarity)
|
|
288
|
+
// hnswlib returns 1 - cosine_similarity for cosine space
|
|
289
|
+
const similarity = 1 - distance
|
|
290
|
+
|
|
291
|
+
if (similarity < threshold) {
|
|
292
|
+
continue
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const entry = this.entries.get(idx)
|
|
296
|
+
if (entry) {
|
|
297
|
+
results.push({
|
|
298
|
+
id: entry.id,
|
|
299
|
+
sectionId: entry.sectionId,
|
|
300
|
+
documentPath: entry.documentPath,
|
|
301
|
+
heading: entry.heading,
|
|
302
|
+
similarity,
|
|
303
|
+
})
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return results
|
|
308
|
+
},
|
|
309
|
+
catch: (e) =>
|
|
310
|
+
new VectorStoreError({
|
|
311
|
+
operation: 'search',
|
|
312
|
+
message: e instanceof Error ? e.message : String(e),
|
|
313
|
+
cause: e,
|
|
314
|
+
}),
|
|
315
|
+
})
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
searchWithStats(
|
|
319
|
+
vector: number[],
|
|
320
|
+
limit: number,
|
|
321
|
+
threshold = 0,
|
|
322
|
+
options?: VectorSearchOptions,
|
|
323
|
+
): Effect.Effect<VectorSearchResultWithStats, VectorStoreError> {
|
|
324
|
+
return Effect.try({
|
|
325
|
+
try: () => {
|
|
326
|
+
if (!this.index || this.entries.size === 0) {
|
|
327
|
+
return {
|
|
328
|
+
results: [],
|
|
329
|
+
belowThresholdCount: 0,
|
|
330
|
+
belowThresholdHighest: null,
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Set efSearch if provided (controls recall/speed tradeoff)
|
|
335
|
+
if (options?.efSearch !== undefined) {
|
|
336
|
+
this.index.setEf(options.efSearch)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const result = this.index.searchKnn(
|
|
340
|
+
vector,
|
|
341
|
+
Math.min(limit, this.entries.size),
|
|
342
|
+
)
|
|
343
|
+
const results: VectorSearchResult[] = []
|
|
344
|
+
let belowThresholdCount = 0
|
|
345
|
+
let belowThresholdHighest: number | null = null
|
|
346
|
+
|
|
347
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
348
|
+
const idx = result.neighbors[i]
|
|
349
|
+
const distance = result.distances[i]
|
|
350
|
+
|
|
351
|
+
if (idx === undefined || distance === undefined) {
|
|
352
|
+
continue
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Convert distance to similarity (cosine distance to cosine similarity)
|
|
356
|
+
// hnswlib returns 1 - cosine_similarity for cosine space
|
|
357
|
+
const similarity = 1 - distance
|
|
358
|
+
|
|
359
|
+
const entry = this.entries.get(idx)
|
|
360
|
+
if (!entry) continue
|
|
361
|
+
|
|
362
|
+
if (similarity < threshold) {
|
|
363
|
+
// Track below-threshold stats
|
|
364
|
+
belowThresholdCount++
|
|
365
|
+
if (
|
|
366
|
+
belowThresholdHighest === null ||
|
|
367
|
+
similarity > belowThresholdHighest
|
|
368
|
+
) {
|
|
369
|
+
belowThresholdHighest = similarity
|
|
370
|
+
}
|
|
371
|
+
continue
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
results.push({
|
|
375
|
+
id: entry.id,
|
|
376
|
+
sectionId: entry.sectionId,
|
|
377
|
+
documentPath: entry.documentPath,
|
|
378
|
+
heading: entry.heading,
|
|
379
|
+
similarity,
|
|
380
|
+
})
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
results,
|
|
385
|
+
belowThresholdCount,
|
|
386
|
+
belowThresholdHighest,
|
|
387
|
+
}
|
|
388
|
+
},
|
|
389
|
+
catch: (e) =>
|
|
390
|
+
new VectorStoreError({
|
|
391
|
+
operation: 'search',
|
|
392
|
+
message: e instanceof Error ? e.message : String(e),
|
|
393
|
+
cause: e,
|
|
394
|
+
}),
|
|
395
|
+
})
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
save(): Effect.Effect<void, VectorStoreError> {
|
|
399
|
+
return Effect.gen(
|
|
400
|
+
function* (this: HnswVectorStore) {
|
|
401
|
+
if (!this.index) {
|
|
402
|
+
return
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
const indexDir = this.getIndexDir()
|
|
406
|
+
yield* Effect.tryPromise({
|
|
407
|
+
try: () => fs.mkdir(indexDir, { recursive: true }),
|
|
408
|
+
catch: (e) =>
|
|
409
|
+
new VectorStoreError({
|
|
410
|
+
operation: 'save',
|
|
411
|
+
message: `Failed to create directory: ${e instanceof Error ? e.message : String(e)}`,
|
|
412
|
+
cause: e,
|
|
413
|
+
}),
|
|
414
|
+
})
|
|
415
|
+
|
|
416
|
+
// Save the hnswlib index
|
|
417
|
+
yield* Effect.tryPromise({
|
|
418
|
+
try: () => this.index!.writeIndex(this.getVectorPath()),
|
|
419
|
+
catch: (e) =>
|
|
420
|
+
new VectorStoreError({
|
|
421
|
+
operation: 'save',
|
|
422
|
+
message: `Failed to write index: ${e instanceof Error ? e.message : String(e)}`,
|
|
423
|
+
cause: e,
|
|
424
|
+
}),
|
|
425
|
+
})
|
|
426
|
+
|
|
427
|
+
// Save metadata
|
|
428
|
+
const meta: VectorIndex = {
|
|
429
|
+
version: INDEX_VERSION,
|
|
430
|
+
provider: this.provider,
|
|
431
|
+
providerModel: this.providerModel,
|
|
432
|
+
providerBaseURL: this.providerBaseURL,
|
|
433
|
+
dimensions: this.dimensions,
|
|
434
|
+
entries: Object.fromEntries(
|
|
435
|
+
Array.from(this.entries.entries()).map(([idx, entry]) => [
|
|
436
|
+
idx.toString(),
|
|
437
|
+
entry,
|
|
438
|
+
]),
|
|
439
|
+
),
|
|
440
|
+
totalCost: this.totalCost,
|
|
441
|
+
totalTokens: this.totalTokens,
|
|
442
|
+
createdAt: new Date().toISOString(),
|
|
443
|
+
updatedAt: new Date().toISOString(),
|
|
444
|
+
// Store HNSW build parameters for validation on load
|
|
445
|
+
hnswParams: {
|
|
446
|
+
m: this.hnswM,
|
|
447
|
+
efConstruction: this.hnswEfConstruction,
|
|
448
|
+
},
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
yield* Effect.tryPromise({
|
|
452
|
+
try: async () => {
|
|
453
|
+
// Size validation
|
|
454
|
+
const estimatedSize = this.entries.size * 15000
|
|
455
|
+
if (estimatedSize > 100_000_000) {
|
|
456
|
+
console.warn(
|
|
457
|
+
`Large metadata detected: ~${(estimatedSize / 1e6).toFixed(0)}MB. ` +
|
|
458
|
+
`Consider indexing subdirectories separately.`,
|
|
459
|
+
)
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Encode with MessagePack and write
|
|
463
|
+
const encoded = msgpack.encode(meta)
|
|
464
|
+
await fs.writeFile(this.getMetaPath(), encoded)
|
|
465
|
+
},
|
|
466
|
+
catch: (e) =>
|
|
467
|
+
new VectorStoreError({
|
|
468
|
+
operation: 'save',
|
|
469
|
+
message: `Failed to write metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
470
|
+
cause: e,
|
|
471
|
+
}),
|
|
472
|
+
})
|
|
473
|
+
}.bind(this),
|
|
474
|
+
)
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
load(): Effect.Effect<
|
|
478
|
+
VectorStoreLoadResult,
|
|
479
|
+
VectorStoreError | DimensionMismatchError
|
|
480
|
+
> {
|
|
481
|
+
return Effect.gen(
|
|
482
|
+
function* (this: HnswVectorStore) {
|
|
483
|
+
const vectorPath = this.getVectorPath()
|
|
484
|
+
const metaPath = this.getMetaPath()
|
|
485
|
+
|
|
486
|
+
// Check if files exist - catch file not found gracefully
|
|
487
|
+
// For metadata, check both binary (.bin) and JSON (.json) for migration
|
|
488
|
+
const filesExist = yield* Effect.tryPromise({
|
|
489
|
+
try: async () => {
|
|
490
|
+
await fs.access(vectorPath)
|
|
491
|
+
// Check if either binary or JSON metadata exists
|
|
492
|
+
try {
|
|
493
|
+
await fs.access(metaPath)
|
|
494
|
+
return true
|
|
495
|
+
} catch {
|
|
496
|
+
const jsonPath = metaPath.replace('.bin', '.json')
|
|
497
|
+
await fs.access(jsonPath)
|
|
498
|
+
return true
|
|
499
|
+
}
|
|
500
|
+
},
|
|
501
|
+
catch: () =>
|
|
502
|
+
new VectorStoreError({
|
|
503
|
+
operation: 'load',
|
|
504
|
+
message: 'Files not found',
|
|
505
|
+
}),
|
|
506
|
+
}).pipe(
|
|
507
|
+
Effect.catchTag('VectorStoreError', () => Effect.succeed(false)),
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
if (!filesExist) {
|
|
511
|
+
return { loaded: false }
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Load metadata - try binary first, fall back to JSON for migration
|
|
515
|
+
const loadedMeta = yield* Effect.tryPromise({
|
|
516
|
+
try: async () => {
|
|
517
|
+
// Try binary format first (new)
|
|
518
|
+
try {
|
|
519
|
+
await fs.access(metaPath)
|
|
520
|
+
const buffer = await fs.readFile(metaPath)
|
|
521
|
+
return msgpack.decode(buffer) as VectorIndex
|
|
522
|
+
} catch {
|
|
523
|
+
// Fall back to JSON for migration (old)
|
|
524
|
+
const jsonPath = metaPath.replace('.bin', '.json')
|
|
525
|
+
try {
|
|
526
|
+
await fs.access(jsonPath)
|
|
527
|
+
const json = await fs.readFile(jsonPath, 'utf-8')
|
|
528
|
+
const meta = JSON.parse(json) as VectorIndex
|
|
529
|
+
|
|
530
|
+
// Auto-migrate to binary format (safe for concurrent access)
|
|
531
|
+
try {
|
|
532
|
+
const encoded = msgpack.encode(meta)
|
|
533
|
+
await fs.writeFile(metaPath, encoded)
|
|
534
|
+
|
|
535
|
+
// Remove old JSON file (ignore errors if already deleted by another process)
|
|
536
|
+
await fs.unlink(jsonPath).catch(() => {})
|
|
537
|
+
} catch {
|
|
538
|
+
// Migration failed, but we have the data - continue
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return meta
|
|
542
|
+
} catch {
|
|
543
|
+
throw new Error('Metadata file not found')
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
},
|
|
547
|
+
catch: (e) =>
|
|
548
|
+
new VectorStoreError({
|
|
549
|
+
operation: 'load',
|
|
550
|
+
message: `Failed to read metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
551
|
+
cause: e,
|
|
552
|
+
}),
|
|
553
|
+
})
|
|
554
|
+
|
|
555
|
+
// Apply legacy index migration: default to 'openai' if provider is missing
|
|
556
|
+
const meta: VectorIndex = {
|
|
557
|
+
...loadedMeta,
|
|
558
|
+
provider: loadedMeta.provider || 'openai',
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// Verify dimensions match - fail with clear error if mismatch
|
|
562
|
+
if (meta.dimensions !== this.dimensions) {
|
|
563
|
+
return yield* Effect.fail(
|
|
564
|
+
new DimensionMismatchError({
|
|
565
|
+
corpusDimensions: meta.dimensions,
|
|
566
|
+
providerDimensions: this.dimensions,
|
|
567
|
+
corpusProvider: meta.providerModel
|
|
568
|
+
? `${meta.provider}:${meta.providerModel}`
|
|
569
|
+
: meta.provider,
|
|
570
|
+
path: this.rootPath,
|
|
571
|
+
}),
|
|
572
|
+
)
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
// Load the hnswlib index
|
|
576
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
577
|
+
'cosine',
|
|
578
|
+
this.dimensions,
|
|
579
|
+
)
|
|
580
|
+
yield* Effect.tryPromise({
|
|
581
|
+
try: () => this.index!.readIndex(vectorPath),
|
|
582
|
+
catch: (e) =>
|
|
583
|
+
new VectorStoreError({
|
|
584
|
+
operation: 'load',
|
|
585
|
+
message: `Failed to read index: ${e instanceof Error ? e.message : String(e)}`,
|
|
586
|
+
cause: e,
|
|
587
|
+
}),
|
|
588
|
+
})
|
|
589
|
+
|
|
590
|
+
// Restore entries
|
|
591
|
+
this.entries.clear()
|
|
592
|
+
this.idToIndex.clear()
|
|
593
|
+
this.nextIndex = 0
|
|
594
|
+
|
|
595
|
+
for (const [idxStr, entry] of Object.entries(meta.entries)) {
|
|
596
|
+
const idx = parseInt(idxStr, 10)
|
|
597
|
+
this.entries.set(idx, entry)
|
|
598
|
+
this.idToIndex.set(entry.id, idx)
|
|
599
|
+
this.nextIndex = Math.max(this.nextIndex, idx + 1)
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
this.provider = meta.provider
|
|
603
|
+
this.providerModel = meta.providerModel
|
|
604
|
+
this.providerBaseURL = meta.providerBaseURL
|
|
605
|
+
this.totalCost = meta.totalCost
|
|
606
|
+
this.totalTokens = meta.totalTokens
|
|
607
|
+
|
|
608
|
+
// Check for HNSW parameter mismatch
|
|
609
|
+
let hnswMismatch: HnswMismatchWarning | undefined
|
|
610
|
+
if (meta.hnswParams) {
|
|
611
|
+
const indexM = meta.hnswParams.m
|
|
612
|
+
const indexEf = meta.hnswParams.efConstruction
|
|
613
|
+
if (indexM !== this.hnswM || indexEf !== this.hnswEfConstruction) {
|
|
614
|
+
hnswMismatch = {
|
|
615
|
+
configParams: {
|
|
616
|
+
m: this.hnswM,
|
|
617
|
+
efConstruction: this.hnswEfConstruction,
|
|
618
|
+
},
|
|
619
|
+
indexParams: { m: indexM, efConstruction: indexEf },
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
return { loaded: true, hnswMismatch }
|
|
625
|
+
}.bind(this),
|
|
626
|
+
)
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
getStats(): VectorStoreStats {
|
|
630
|
+
return {
|
|
631
|
+
count: this.entries.size,
|
|
632
|
+
dimensions: this.dimensions,
|
|
633
|
+
provider: this.provider,
|
|
634
|
+
providerModel: this.providerModel,
|
|
635
|
+
totalCost: this.totalCost,
|
|
636
|
+
totalTokens: this.totalTokens,
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
setProvider(name: string, model?: string, baseURL?: string): void {
|
|
641
|
+
this.provider = name
|
|
642
|
+
this.providerModel = model
|
|
643
|
+
this.providerBaseURL = baseURL
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
addCost(cost: number, tokens: number): void {
|
|
647
|
+
this.totalCost += cost
|
|
648
|
+
this.totalTokens += tokens
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// ============================================================================
|
|
653
|
+
// Factory
|
|
654
|
+
// ============================================================================
|
|
655
|
+
|
|
656
|
+
/**
|
|
657
|
+
* HNSW build parameters for index construction.
|
|
658
|
+
* These affect index quality and build time - changes require index rebuild.
|
|
659
|
+
*/
|
|
660
|
+
export interface HnswBuildOptions {
|
|
661
|
+
/** Max connections per node (default: 16). Higher = better recall, larger index. */
|
|
662
|
+
readonly m?: number | undefined
|
|
663
|
+
/** Construction-time search width (default: 200). Higher = better quality, slower builds. */
|
|
664
|
+
readonly efConstruction?: number | undefined
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Create a vector store for the given root path.
|
|
669
|
+
*
|
|
670
|
+
* @param rootPath - Root directory containing the index
|
|
671
|
+
* @param dimensions - Embedding dimensions
|
|
672
|
+
* @param hnswOptions - Optional HNSW build parameters
|
|
673
|
+
* @returns A new VectorStore instance
|
|
674
|
+
*/
|
|
675
|
+
export const createVectorStore = (
|
|
676
|
+
rootPath: string,
|
|
677
|
+
dimensions: number,
|
|
678
|
+
hnswOptions?: HnswBuildOptions,
|
|
679
|
+
): VectorStore => new HnswVectorStore(rootPath, dimensions, hnswOptions)
|
|
680
|
+
|
|
681
|
+
/**
|
|
682
|
+
* Create a namespaced vector store for a specific provider/model.
|
|
683
|
+
*
|
|
684
|
+
* Uses the new namespaced storage structure:
|
|
685
|
+
* .mdcontext/embeddings/{provider}_{model}_{dimensions}/vectors.bin
|
|
686
|
+
*
|
|
687
|
+
* @param rootPath - Root directory containing the index
|
|
688
|
+
* @param provider - Provider name (e.g., "openai", "voyage")
|
|
689
|
+
* @param model - Model name (e.g., "text-embedding-3-small")
|
|
690
|
+
* @param dimensions - Embedding dimensions
|
|
691
|
+
* @param hnswOptions - Optional HNSW build parameters
|
|
692
|
+
* @returns A new VectorStore instance with namespace set
|
|
693
|
+
*/
|
|
694
|
+
export const createNamespacedVectorStore = (
|
|
695
|
+
rootPath: string,
|
|
696
|
+
provider: string,
|
|
697
|
+
model: string,
|
|
698
|
+
dimensions: number,
|
|
699
|
+
hnswOptions?: HnswBuildOptions,
|
|
700
|
+
): VectorStore => {
|
|
701
|
+
const namespace = generateNamespace(provider, model, dimensions)
|
|
702
|
+
const store = new HnswVectorStore(rootPath, dimensions, hnswOptions)
|
|
703
|
+
store.setNamespace(namespace)
|
|
704
|
+
return store
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// Export the class for type access
|
|
708
|
+
export { HnswVectorStore }
|