mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
|
@@ -1,12 +1,28 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Vector store using hnswlib-node
|
|
3
|
+
*
|
|
4
|
+
* Supports both legacy (flat) and namespaced storage layouts:
|
|
5
|
+
* - Legacy: .mdcontext/vectors.bin, .mdcontext/vectors.meta.bin
|
|
6
|
+
* - Namespaced: .mdcontext/embeddings/{namespace}/vectors.bin, vectors.meta.bin
|
|
7
|
+
*
|
|
8
|
+
* New indexes are written using namespaced storage. Existing legacy indexes
|
|
9
|
+
* continue to be loaded from their original flat locations; this module does
|
|
10
|
+
* not perform automatic migration between layouts.
|
|
3
11
|
*/
|
|
4
12
|
|
|
5
13
|
import * as fs from 'node:fs/promises'
|
|
6
14
|
import * as path from 'node:path'
|
|
15
|
+
import * as msgpack from '@msgpack/msgpack'
|
|
7
16
|
import { Effect } from 'effect'
|
|
8
17
|
import HierarchicalNSW from 'hnswlib-node'
|
|
18
|
+
import { DimensionMismatchError, VectorStoreError } from '../errors/index.js'
|
|
9
19
|
import { INDEX_DIR } from '../index/types.js'
|
|
20
|
+
import {
|
|
21
|
+
generateNamespace,
|
|
22
|
+
getNamespaceDir,
|
|
23
|
+
getMetaPath as getNamespacedMetaPath,
|
|
24
|
+
getVectorPath as getNamespacedVectorPath,
|
|
25
|
+
} from './embedding-namespace.js'
|
|
10
26
|
import type { VectorEntry, VectorIndex } from './types.js'
|
|
11
27
|
|
|
12
28
|
// ============================================================================
|
|
@@ -14,24 +30,49 @@ import type { VectorEntry, VectorIndex } from './types.js'
|
|
|
14
30
|
// ============================================================================
|
|
15
31
|
|
|
16
32
|
const VECTOR_INDEX_FILE = 'vectors.bin'
|
|
17
|
-
const VECTOR_META_FILE = 'vectors.meta.
|
|
33
|
+
const VECTOR_META_FILE = 'vectors.meta.bin'
|
|
18
34
|
const INDEX_VERSION = 1
|
|
19
35
|
|
|
20
36
|
// ============================================================================
|
|
21
37
|
// Vector Store
|
|
22
38
|
// ============================================================================
|
|
23
39
|
|
|
40
|
+
export interface VectorSearchOptions {
|
|
41
|
+
/** efSearch parameter for HNSW (controls recall/speed tradeoff, default: 100) */
|
|
42
|
+
readonly efSearch?: number | undefined
|
|
43
|
+
}
|
|
44
|
+
|
|
24
45
|
export interface VectorStore {
|
|
25
46
|
readonly rootPath: string
|
|
26
47
|
readonly dimensions: number
|
|
27
|
-
add(entries: VectorEntry[]): Effect.Effect<void,
|
|
48
|
+
add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError>
|
|
28
49
|
search(
|
|
29
50
|
vector: number[],
|
|
30
51
|
limit: number,
|
|
31
52
|
threshold?: number,
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
53
|
+
options?: VectorSearchOptions,
|
|
54
|
+
): Effect.Effect<VectorSearchResult[], VectorStoreError>
|
|
55
|
+
/**
|
|
56
|
+
* Search with additional stats about below-threshold results.
|
|
57
|
+
* Used to provide feedback when 0 results pass the threshold.
|
|
58
|
+
*/
|
|
59
|
+
searchWithStats(
|
|
60
|
+
vector: number[],
|
|
61
|
+
limit: number,
|
|
62
|
+
threshold?: number,
|
|
63
|
+
options?: VectorSearchOptions,
|
|
64
|
+
): Effect.Effect<VectorSearchResultWithStats, VectorStoreError>
|
|
65
|
+
save(): Effect.Effect<void, VectorStoreError>
|
|
66
|
+
/**
|
|
67
|
+
* Load the vector store from disk.
|
|
68
|
+
*
|
|
69
|
+
* @returns VectorStoreLoadResult with loaded status and any warnings
|
|
70
|
+
* @throws DimensionMismatchError if the stored dimensions don't match current provider
|
|
71
|
+
*/
|
|
72
|
+
load(): Effect.Effect<
|
|
73
|
+
VectorStoreLoadResult,
|
|
74
|
+
VectorStoreError | DimensionMismatchError
|
|
75
|
+
>
|
|
35
76
|
getStats(): VectorStoreStats
|
|
36
77
|
}
|
|
37
78
|
|
|
@@ -43,14 +84,48 @@ export interface VectorSearchResult {
|
|
|
43
84
|
readonly similarity: number
|
|
44
85
|
}
|
|
45
86
|
|
|
87
|
+
/**
|
|
88
|
+
* Extended search result with metadata about below-threshold results.
|
|
89
|
+
* Used to provide user feedback when 0 results pass the threshold.
|
|
90
|
+
*/
|
|
91
|
+
export interface VectorSearchResultWithStats {
|
|
92
|
+
readonly results: VectorSearchResult[]
|
|
93
|
+
/** Number of results that were found but below threshold */
|
|
94
|
+
readonly belowThresholdCount: number
|
|
95
|
+
/** Highest similarity score among below-threshold results (if any) */
|
|
96
|
+
readonly belowThresholdHighest: number | null
|
|
97
|
+
}
|
|
98
|
+
|
|
46
99
|
export interface VectorStoreStats {
|
|
47
100
|
readonly count: number
|
|
48
101
|
readonly dimensions: number
|
|
49
102
|
readonly provider: string
|
|
103
|
+
readonly providerModel?: string | undefined
|
|
50
104
|
readonly totalCost: number
|
|
51
105
|
readonly totalTokens: number
|
|
52
106
|
}
|
|
53
107
|
|
|
108
|
+
/**
|
|
109
|
+
* Result of loading a vector store, including any warnings about config mismatches.
|
|
110
|
+
*/
|
|
111
|
+
export interface VectorStoreLoadResult {
|
|
112
|
+
/** Whether the index was loaded successfully */
|
|
113
|
+
readonly loaded: boolean
|
|
114
|
+
/** Warning about HNSW parameter mismatch (if any) */
|
|
115
|
+
readonly hnswMismatch?: HnswMismatchWarning | undefined
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Warning when HNSW parameters in config differ from stored index parameters.
|
|
120
|
+
* The index was built with different parameters than currently configured.
|
|
121
|
+
*/
|
|
122
|
+
export interface HnswMismatchWarning {
|
|
123
|
+
/** Current config values */
|
|
124
|
+
readonly configParams: { m: number; efConstruction: number }
|
|
125
|
+
/** Values stored in the index */
|
|
126
|
+
readonly indexParams: { m: number; efConstruction: number }
|
|
127
|
+
}
|
|
128
|
+
|
|
54
129
|
// ============================================================================
|
|
55
130
|
// Implementation
|
|
56
131
|
// ============================================================================
|
|
@@ -64,24 +139,73 @@ class HnswVectorStore implements VectorStore {
|
|
|
64
139
|
private idToIndex: Map<string, number> = new Map()
|
|
65
140
|
private nextIndex = 0
|
|
66
141
|
private provider = 'unknown'
|
|
142
|
+
private providerModel: string | undefined = undefined
|
|
143
|
+
private providerBaseURL: string | undefined = undefined
|
|
67
144
|
private totalCost = 0
|
|
68
145
|
private totalTokens = 0
|
|
69
146
|
|
|
70
|
-
|
|
147
|
+
// HNSW build parameters
|
|
148
|
+
private readonly hnswM: number
|
|
149
|
+
private readonly hnswEfConstruction: number
|
|
150
|
+
|
|
151
|
+
// Namespace support - when set, uses namespaced storage paths
|
|
152
|
+
private namespace: string | undefined = undefined
|
|
153
|
+
|
|
154
|
+
constructor(
|
|
155
|
+
rootPath: string,
|
|
156
|
+
dimensions: number,
|
|
157
|
+
hnswOptions?: HnswBuildOptions,
|
|
158
|
+
) {
|
|
71
159
|
this.rootPath = path.resolve(rootPath)
|
|
72
160
|
this.dimensions = dimensions
|
|
161
|
+
this.hnswM = hnswOptions?.m ?? 16
|
|
162
|
+
this.hnswEfConstruction = hnswOptions?.efConstruction ?? 200
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Set the namespace for this vector store.
|
|
167
|
+
* When set, all storage operations use the namespaced path.
|
|
168
|
+
*/
|
|
169
|
+
setNamespace(namespace: string): void {
|
|
170
|
+
this.namespace = namespace
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Get the current namespace (if any).
|
|
175
|
+
*/
|
|
176
|
+
getNamespace(): string | undefined {
|
|
177
|
+
return this.namespace
|
|
73
178
|
}
|
|
74
179
|
|
|
180
|
+
/**
|
|
181
|
+
* Get the index directory path.
|
|
182
|
+
* Returns namespaced path if namespace is set, otherwise legacy path.
|
|
183
|
+
*/
|
|
75
184
|
private getIndexDir(): string {
|
|
185
|
+
if (this.namespace) {
|
|
186
|
+
return getNamespaceDir(this.rootPath, this.namespace)
|
|
187
|
+
}
|
|
76
188
|
return path.join(this.rootPath, INDEX_DIR)
|
|
77
189
|
}
|
|
78
190
|
|
|
191
|
+
/**
|
|
192
|
+
* Get the vector index file path.
|
|
193
|
+
*/
|
|
79
194
|
private getVectorPath(): string {
|
|
80
|
-
|
|
195
|
+
if (this.namespace) {
|
|
196
|
+
return getNamespacedVectorPath(this.rootPath, this.namespace)
|
|
197
|
+
}
|
|
198
|
+
return path.join(this.rootPath, INDEX_DIR, VECTOR_INDEX_FILE)
|
|
81
199
|
}
|
|
82
200
|
|
|
201
|
+
/**
|
|
202
|
+
* Get the metadata file path.
|
|
203
|
+
*/
|
|
83
204
|
private getMetaPath(): string {
|
|
84
|
-
|
|
205
|
+
if (this.namespace) {
|
|
206
|
+
return getNamespacedMetaPath(this.rootPath, this.namespace)
|
|
207
|
+
}
|
|
208
|
+
return path.join(this.rootPath, INDEX_DIR, VECTOR_META_FILE)
|
|
85
209
|
}
|
|
86
210
|
|
|
87
211
|
private ensureIndex(): HierarchicalNSW.HierarchicalNSW {
|
|
@@ -91,32 +215,41 @@ class HnswVectorStore implements VectorStore {
|
|
|
91
215
|
'cosine',
|
|
92
216
|
this.dimensions,
|
|
93
217
|
)
|
|
94
|
-
|
|
218
|
+
// Use configured HNSW parameters (M, efConstruction, randomSeed)
|
|
219
|
+
this.index.initIndex(10000, this.hnswM, this.hnswEfConstruction, 100)
|
|
95
220
|
}
|
|
96
221
|
return this.index
|
|
97
222
|
}
|
|
98
223
|
|
|
99
|
-
add(entries: VectorEntry[]): Effect.Effect<void,
|
|
100
|
-
return Effect.
|
|
101
|
-
|
|
224
|
+
add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError> {
|
|
225
|
+
return Effect.try({
|
|
226
|
+
try: () => {
|
|
227
|
+
const index = this.ensureIndex()
|
|
102
228
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
229
|
+
for (const entry of entries) {
|
|
230
|
+
// Skip if already exists
|
|
231
|
+
if (this.idToIndex.has(entry.id)) {
|
|
232
|
+
continue
|
|
233
|
+
}
|
|
108
234
|
|
|
109
|
-
|
|
235
|
+
const idx = this.nextIndex++
|
|
110
236
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
237
|
+
// Resize if needed
|
|
238
|
+
if (idx >= index.getMaxElements()) {
|
|
239
|
+
index.resizeIndex(index.getMaxElements() * 2)
|
|
240
|
+
}
|
|
115
241
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
242
|
+
index.addPoint(entry.embedding as number[], idx)
|
|
243
|
+
this.entries.set(idx, entry)
|
|
244
|
+
this.idToIndex.set(entry.id, idx)
|
|
245
|
+
}
|
|
246
|
+
},
|
|
247
|
+
catch: (e) =>
|
|
248
|
+
new VectorStoreError({
|
|
249
|
+
operation: 'add',
|
|
250
|
+
message: e instanceof Error ? e.message : String(e),
|
|
251
|
+
cause: e,
|
|
252
|
+
}),
|
|
120
253
|
})
|
|
121
254
|
}
|
|
122
255
|
|
|
@@ -124,36 +257,120 @@ class HnswVectorStore implements VectorStore {
|
|
|
124
257
|
vector: number[],
|
|
125
258
|
limit: number,
|
|
126
259
|
threshold = 0,
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
vector,
|
|
135
|
-
Math.min(limit, this.entries.size),
|
|
136
|
-
)
|
|
137
|
-
const results: VectorSearchResult[] = []
|
|
260
|
+
options?: VectorSearchOptions,
|
|
261
|
+
): Effect.Effect<VectorSearchResult[], VectorStoreError> {
|
|
262
|
+
return Effect.try({
|
|
263
|
+
try: () => {
|
|
264
|
+
if (!this.index || this.entries.size === 0) {
|
|
265
|
+
return []
|
|
266
|
+
}
|
|
138
267
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
268
|
+
// Set efSearch if provided (controls recall/speed tradeoff)
|
|
269
|
+
if (options?.efSearch !== undefined) {
|
|
270
|
+
this.index.setEf(options.efSearch)
|
|
271
|
+
}
|
|
142
272
|
|
|
143
|
-
|
|
144
|
-
|
|
273
|
+
const result = this.index.searchKnn(
|
|
274
|
+
vector,
|
|
275
|
+
Math.min(limit, this.entries.size),
|
|
276
|
+
)
|
|
277
|
+
const results: VectorSearchResult[] = []
|
|
278
|
+
|
|
279
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
280
|
+
const idx = result.neighbors[i]
|
|
281
|
+
const distance = result.distances[i]
|
|
282
|
+
|
|
283
|
+
if (idx === undefined || distance === undefined) {
|
|
284
|
+
continue
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Convert distance to similarity (cosine distance to cosine similarity)
|
|
288
|
+
// hnswlib returns 1 - cosine_similarity for cosine space
|
|
289
|
+
const similarity = 1 - distance
|
|
290
|
+
|
|
291
|
+
if (similarity < threshold) {
|
|
292
|
+
continue
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const entry = this.entries.get(idx)
|
|
296
|
+
if (entry) {
|
|
297
|
+
results.push({
|
|
298
|
+
id: entry.id,
|
|
299
|
+
sectionId: entry.sectionId,
|
|
300
|
+
documentPath: entry.documentPath,
|
|
301
|
+
heading: entry.heading,
|
|
302
|
+
similarity,
|
|
303
|
+
})
|
|
304
|
+
}
|
|
145
305
|
}
|
|
146
306
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
307
|
+
return results
|
|
308
|
+
},
|
|
309
|
+
catch: (e) =>
|
|
310
|
+
new VectorStoreError({
|
|
311
|
+
operation: 'search',
|
|
312
|
+
message: e instanceof Error ? e.message : String(e),
|
|
313
|
+
cause: e,
|
|
314
|
+
}),
|
|
315
|
+
})
|
|
316
|
+
}
|
|
150
317
|
|
|
151
|
-
|
|
152
|
-
|
|
318
|
+
searchWithStats(
|
|
319
|
+
vector: number[],
|
|
320
|
+
limit: number,
|
|
321
|
+
threshold = 0,
|
|
322
|
+
options?: VectorSearchOptions,
|
|
323
|
+
): Effect.Effect<VectorSearchResultWithStats, VectorStoreError> {
|
|
324
|
+
return Effect.try({
|
|
325
|
+
try: () => {
|
|
326
|
+
if (!this.index || this.entries.size === 0) {
|
|
327
|
+
return {
|
|
328
|
+
results: [],
|
|
329
|
+
belowThresholdCount: 0,
|
|
330
|
+
belowThresholdHighest: null,
|
|
331
|
+
}
|
|
153
332
|
}
|
|
154
333
|
|
|
155
|
-
|
|
156
|
-
if (
|
|
334
|
+
// Set efSearch if provided (controls recall/speed tradeoff)
|
|
335
|
+
if (options?.efSearch !== undefined) {
|
|
336
|
+
this.index.setEf(options.efSearch)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
const result = this.index.searchKnn(
|
|
340
|
+
vector,
|
|
341
|
+
Math.min(limit, this.entries.size),
|
|
342
|
+
)
|
|
343
|
+
const results: VectorSearchResult[] = []
|
|
344
|
+
let belowThresholdCount = 0
|
|
345
|
+
let belowThresholdHighest: number | null = null
|
|
346
|
+
|
|
347
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
348
|
+
const idx = result.neighbors[i]
|
|
349
|
+
const distance = result.distances[i]
|
|
350
|
+
|
|
351
|
+
if (idx === undefined || distance === undefined) {
|
|
352
|
+
continue
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Convert distance to similarity (cosine distance to cosine similarity)
|
|
356
|
+
// hnswlib returns 1 - cosine_similarity for cosine space
|
|
357
|
+
const similarity = 1 - distance
|
|
358
|
+
|
|
359
|
+
const entry = this.entries.get(idx)
|
|
360
|
+
if (!entry) continue
|
|
361
|
+
|
|
362
|
+
if (similarity < threshold) {
|
|
363
|
+
// Track below-threshold stats
|
|
364
|
+
belowThresholdCount++
|
|
365
|
+
if (
|
|
366
|
+
belowThresholdHighest === null ||
|
|
367
|
+
similarity > belowThresholdHighest
|
|
368
|
+
) {
|
|
369
|
+
belowThresholdHighest = similarity
|
|
370
|
+
}
|
|
371
|
+
continue
|
|
372
|
+
}
|
|
373
|
+
|
|
157
374
|
results.push({
|
|
158
375
|
id: entry.id,
|
|
159
376
|
sectionId: entry.sectionId,
|
|
@@ -162,13 +379,23 @@ class HnswVectorStore implements VectorStore {
|
|
|
162
379
|
similarity,
|
|
163
380
|
})
|
|
164
381
|
}
|
|
165
|
-
}
|
|
166
382
|
|
|
167
|
-
|
|
383
|
+
return {
|
|
384
|
+
results,
|
|
385
|
+
belowThresholdCount,
|
|
386
|
+
belowThresholdHighest,
|
|
387
|
+
}
|
|
388
|
+
},
|
|
389
|
+
catch: (e) =>
|
|
390
|
+
new VectorStoreError({
|
|
391
|
+
operation: 'search',
|
|
392
|
+
message: e instanceof Error ? e.message : String(e),
|
|
393
|
+
cause: e,
|
|
394
|
+
}),
|
|
168
395
|
})
|
|
169
396
|
}
|
|
170
397
|
|
|
171
|
-
save(): Effect.Effect<void,
|
|
398
|
+
save(): Effect.Effect<void, VectorStoreError> {
|
|
172
399
|
return Effect.gen(
|
|
173
400
|
function* (this: HnswVectorStore) {
|
|
174
401
|
if (!this.index) {
|
|
@@ -176,17 +403,33 @@ class HnswVectorStore implements VectorStore {
|
|
|
176
403
|
}
|
|
177
404
|
|
|
178
405
|
const indexDir = this.getIndexDir()
|
|
179
|
-
yield* Effect.
|
|
406
|
+
yield* Effect.tryPromise({
|
|
407
|
+
try: () => fs.mkdir(indexDir, { recursive: true }),
|
|
408
|
+
catch: (e) =>
|
|
409
|
+
new VectorStoreError({
|
|
410
|
+
operation: 'save',
|
|
411
|
+
message: `Failed to create directory: ${e instanceof Error ? e.message : String(e)}`,
|
|
412
|
+
cause: e,
|
|
413
|
+
}),
|
|
414
|
+
})
|
|
180
415
|
|
|
181
416
|
// Save the hnswlib index
|
|
182
|
-
yield* Effect.
|
|
183
|
-
this.index!.writeIndex(this.getVectorPath()),
|
|
184
|
-
|
|
417
|
+
yield* Effect.tryPromise({
|
|
418
|
+
try: () => this.index!.writeIndex(this.getVectorPath()),
|
|
419
|
+
catch: (e) =>
|
|
420
|
+
new VectorStoreError({
|
|
421
|
+
operation: 'save',
|
|
422
|
+
message: `Failed to write index: ${e instanceof Error ? e.message : String(e)}`,
|
|
423
|
+
cause: e,
|
|
424
|
+
}),
|
|
425
|
+
})
|
|
185
426
|
|
|
186
427
|
// Save metadata
|
|
187
428
|
const meta: VectorIndex = {
|
|
188
429
|
version: INDEX_VERSION,
|
|
189
430
|
provider: this.provider,
|
|
431
|
+
providerModel: this.providerModel,
|
|
432
|
+
providerBaseURL: this.providerBaseURL,
|
|
190
433
|
dimensions: this.dimensions,
|
|
191
434
|
entries: Object.fromEntries(
|
|
192
435
|
Array.from(this.entries.entries()).map(([idx, entry]) => [
|
|
@@ -198,44 +441,135 @@ class HnswVectorStore implements VectorStore {
|
|
|
198
441
|
totalTokens: this.totalTokens,
|
|
199
442
|
createdAt: new Date().toISOString(),
|
|
200
443
|
updatedAt: new Date().toISOString(),
|
|
444
|
+
// Store HNSW build parameters for validation on load
|
|
445
|
+
hnswParams: {
|
|
446
|
+
m: this.hnswM,
|
|
447
|
+
efConstruction: this.hnswEfConstruction,
|
|
448
|
+
},
|
|
201
449
|
}
|
|
202
450
|
|
|
203
|
-
yield* Effect.
|
|
204
|
-
|
|
205
|
-
|
|
451
|
+
yield* Effect.tryPromise({
|
|
452
|
+
try: async () => {
|
|
453
|
+
// Size validation
|
|
454
|
+
const estimatedSize = this.entries.size * 15000
|
|
455
|
+
if (estimatedSize > 100_000_000) {
|
|
456
|
+
console.warn(
|
|
457
|
+
`Large metadata detected: ~${(estimatedSize / 1e6).toFixed(0)}MB. ` +
|
|
458
|
+
`Consider indexing subdirectories separately.`,
|
|
459
|
+
)
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Encode with MessagePack and write
|
|
463
|
+
const encoded = msgpack.encode(meta)
|
|
464
|
+
await fs.writeFile(this.getMetaPath(), encoded)
|
|
465
|
+
},
|
|
466
|
+
catch: (e) =>
|
|
467
|
+
new VectorStoreError({
|
|
468
|
+
operation: 'save',
|
|
469
|
+
message: `Failed to write metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
470
|
+
cause: e,
|
|
471
|
+
}),
|
|
472
|
+
})
|
|
206
473
|
}.bind(this),
|
|
207
474
|
)
|
|
208
475
|
}
|
|
209
476
|
|
|
210
|
-
load(): Effect.Effect<
|
|
477
|
+
load(): Effect.Effect<
|
|
478
|
+
VectorStoreLoadResult,
|
|
479
|
+
VectorStoreError | DimensionMismatchError
|
|
480
|
+
> {
|
|
211
481
|
return Effect.gen(
|
|
212
482
|
function* (this: HnswVectorStore) {
|
|
213
483
|
const vectorPath = this.getVectorPath()
|
|
214
484
|
const metaPath = this.getMetaPath()
|
|
215
485
|
|
|
216
|
-
// Check if files exist
|
|
486
|
+
// Check if files exist - catch file not found gracefully
|
|
487
|
+
// For metadata, check both binary (.bin) and JSON (.json) for migration
|
|
217
488
|
const filesExist = yield* Effect.tryPromise({
|
|
218
489
|
try: async () => {
|
|
219
490
|
await fs.access(vectorPath)
|
|
220
|
-
|
|
221
|
-
|
|
491
|
+
// Check if either binary or JSON metadata exists
|
|
492
|
+
try {
|
|
493
|
+
await fs.access(metaPath)
|
|
494
|
+
return true
|
|
495
|
+
} catch {
|
|
496
|
+
const jsonPath = metaPath.replace('.bin', '.json')
|
|
497
|
+
await fs.access(jsonPath)
|
|
498
|
+
return true
|
|
499
|
+
}
|
|
222
500
|
},
|
|
223
|
-
catch: () =>
|
|
224
|
-
|
|
501
|
+
catch: () =>
|
|
502
|
+
new VectorStoreError({
|
|
503
|
+
operation: 'load',
|
|
504
|
+
message: 'Files not found',
|
|
505
|
+
}),
|
|
506
|
+
}).pipe(
|
|
507
|
+
Effect.catchTag('VectorStoreError', () => Effect.succeed(false)),
|
|
508
|
+
)
|
|
225
509
|
|
|
226
510
|
if (!filesExist) {
|
|
227
|
-
return false
|
|
511
|
+
return { loaded: false }
|
|
228
512
|
}
|
|
229
513
|
|
|
230
|
-
// Load metadata first
|
|
231
|
-
const
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
514
|
+
// Load metadata - try binary first, fall back to JSON for migration
|
|
515
|
+
const loadedMeta = yield* Effect.tryPromise({
|
|
516
|
+
try: async () => {
|
|
517
|
+
// Try binary format first (new)
|
|
518
|
+
try {
|
|
519
|
+
await fs.access(metaPath)
|
|
520
|
+
const buffer = await fs.readFile(metaPath)
|
|
521
|
+
return msgpack.decode(buffer) as VectorIndex
|
|
522
|
+
} catch {
|
|
523
|
+
// Fall back to JSON for migration (old)
|
|
524
|
+
const jsonPath = metaPath.replace('.bin', '.json')
|
|
525
|
+
try {
|
|
526
|
+
await fs.access(jsonPath)
|
|
527
|
+
const json = await fs.readFile(jsonPath, 'utf-8')
|
|
528
|
+
const meta = JSON.parse(json) as VectorIndex
|
|
529
|
+
|
|
530
|
+
// Auto-migrate to binary format (safe for concurrent access)
|
|
531
|
+
try {
|
|
532
|
+
const encoded = msgpack.encode(meta)
|
|
533
|
+
await fs.writeFile(metaPath, encoded)
|
|
534
|
+
|
|
535
|
+
// Remove old JSON file (ignore errors if already deleted by another process)
|
|
536
|
+
await fs.unlink(jsonPath).catch(() => {})
|
|
537
|
+
} catch {
|
|
538
|
+
// Migration failed, but we have the data - continue
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return meta
|
|
542
|
+
} catch {
|
|
543
|
+
throw new Error('Metadata file not found')
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
},
|
|
547
|
+
catch: (e) =>
|
|
548
|
+
new VectorStoreError({
|
|
549
|
+
operation: 'load',
|
|
550
|
+
message: `Failed to read metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
551
|
+
cause: e,
|
|
552
|
+
}),
|
|
553
|
+
})
|
|
554
|
+
|
|
555
|
+
// Apply legacy index migration: default to 'openai' if provider is missing
|
|
556
|
+
const meta: VectorIndex = {
|
|
557
|
+
...loadedMeta,
|
|
558
|
+
provider: loadedMeta.provider || 'openai',
|
|
559
|
+
}
|
|
235
560
|
|
|
236
|
-
// Verify dimensions match
|
|
561
|
+
// Verify dimensions match - fail with clear error if mismatch
|
|
237
562
|
if (meta.dimensions !== this.dimensions) {
|
|
238
|
-
return
|
|
563
|
+
return yield* Effect.fail(
|
|
564
|
+
new DimensionMismatchError({
|
|
565
|
+
corpusDimensions: meta.dimensions,
|
|
566
|
+
providerDimensions: this.dimensions,
|
|
567
|
+
corpusProvider: meta.providerModel
|
|
568
|
+
? `${meta.provider}:${meta.providerModel}`
|
|
569
|
+
: meta.provider,
|
|
570
|
+
path: this.rootPath,
|
|
571
|
+
}),
|
|
572
|
+
)
|
|
239
573
|
}
|
|
240
574
|
|
|
241
575
|
// Load the hnswlib index
|
|
@@ -243,7 +577,15 @@ class HnswVectorStore implements VectorStore {
|
|
|
243
577
|
'cosine',
|
|
244
578
|
this.dimensions,
|
|
245
579
|
)
|
|
246
|
-
yield* Effect.
|
|
580
|
+
yield* Effect.tryPromise({
|
|
581
|
+
try: () => this.index!.readIndex(vectorPath),
|
|
582
|
+
catch: (e) =>
|
|
583
|
+
new VectorStoreError({
|
|
584
|
+
operation: 'load',
|
|
585
|
+
message: `Failed to read index: ${e instanceof Error ? e.message : String(e)}`,
|
|
586
|
+
cause: e,
|
|
587
|
+
}),
|
|
588
|
+
})
|
|
247
589
|
|
|
248
590
|
// Restore entries
|
|
249
591
|
this.entries.clear()
|
|
@@ -258,10 +600,28 @@ class HnswVectorStore implements VectorStore {
|
|
|
258
600
|
}
|
|
259
601
|
|
|
260
602
|
this.provider = meta.provider
|
|
603
|
+
this.providerModel = meta.providerModel
|
|
604
|
+
this.providerBaseURL = meta.providerBaseURL
|
|
261
605
|
this.totalCost = meta.totalCost
|
|
262
606
|
this.totalTokens = meta.totalTokens
|
|
263
607
|
|
|
264
|
-
|
|
608
|
+
// Check for HNSW parameter mismatch
|
|
609
|
+
let hnswMismatch: HnswMismatchWarning | undefined
|
|
610
|
+
if (meta.hnswParams) {
|
|
611
|
+
const indexM = meta.hnswParams.m
|
|
612
|
+
const indexEf = meta.hnswParams.efConstruction
|
|
613
|
+
if (indexM !== this.hnswM || indexEf !== this.hnswEfConstruction) {
|
|
614
|
+
hnswMismatch = {
|
|
615
|
+
configParams: {
|
|
616
|
+
m: this.hnswM,
|
|
617
|
+
efConstruction: this.hnswEfConstruction,
|
|
618
|
+
},
|
|
619
|
+
indexParams: { m: indexM, efConstruction: indexEf },
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
return { loaded: true, hnswMismatch }
|
|
265
625
|
}.bind(this),
|
|
266
626
|
)
|
|
267
627
|
}
|
|
@@ -271,13 +631,16 @@ class HnswVectorStore implements VectorStore {
|
|
|
271
631
|
count: this.entries.size,
|
|
272
632
|
dimensions: this.dimensions,
|
|
273
633
|
provider: this.provider,
|
|
634
|
+
providerModel: this.providerModel,
|
|
274
635
|
totalCost: this.totalCost,
|
|
275
636
|
totalTokens: this.totalTokens,
|
|
276
637
|
}
|
|
277
638
|
}
|
|
278
639
|
|
|
279
|
-
setProvider(name: string): void {
|
|
640
|
+
setProvider(name: string, model?: string, baseURL?: string): void {
|
|
280
641
|
this.provider = name
|
|
642
|
+
this.providerModel = model
|
|
643
|
+
this.providerBaseURL = baseURL
|
|
281
644
|
}
|
|
282
645
|
|
|
283
646
|
addCost(cost: number, tokens: number): void {
|
|
@@ -290,10 +653,56 @@ class HnswVectorStore implements VectorStore {
|
|
|
290
653
|
// Factory
|
|
291
654
|
// ============================================================================
|
|
292
655
|
|
|
656
|
+
/**
|
|
657
|
+
* HNSW build parameters for index construction.
|
|
658
|
+
* These affect index quality and build time - changes require index rebuild.
|
|
659
|
+
*/
|
|
660
|
+
export interface HnswBuildOptions {
|
|
661
|
+
/** Max connections per node (default: 16). Higher = better recall, larger index. */
|
|
662
|
+
readonly m?: number | undefined
|
|
663
|
+
/** Construction-time search width (default: 200). Higher = better quality, slower builds. */
|
|
664
|
+
readonly efConstruction?: number | undefined
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* Create a vector store for the given root path.
|
|
669
|
+
*
|
|
670
|
+
* @param rootPath - Root directory containing the index
|
|
671
|
+
* @param dimensions - Embedding dimensions
|
|
672
|
+
* @param hnswOptions - Optional HNSW build parameters
|
|
673
|
+
* @returns A new VectorStore instance
|
|
674
|
+
*/
|
|
293
675
|
export const createVectorStore = (
|
|
294
676
|
rootPath: string,
|
|
295
677
|
dimensions: number,
|
|
296
|
-
|
|
678
|
+
hnswOptions?: HnswBuildOptions,
|
|
679
|
+
): VectorStore => new HnswVectorStore(rootPath, dimensions, hnswOptions)
|
|
680
|
+
|
|
681
|
+
/**
|
|
682
|
+
* Create a namespaced vector store for a specific provider/model.
|
|
683
|
+
*
|
|
684
|
+
* Uses the new namespaced storage structure:
|
|
685
|
+
* .mdcontext/embeddings/{provider}_{model}_{dimensions}/vectors.bin
|
|
686
|
+
*
|
|
687
|
+
* @param rootPath - Root directory containing the index
|
|
688
|
+
* @param provider - Provider name (e.g., "openai", "voyage")
|
|
689
|
+
* @param model - Model name (e.g., "text-embedding-3-small")
|
|
690
|
+
* @param dimensions - Embedding dimensions
|
|
691
|
+
* @param hnswOptions - Optional HNSW build parameters
|
|
692
|
+
* @returns A new VectorStore instance with namespace set
|
|
693
|
+
*/
|
|
694
|
+
export const createNamespacedVectorStore = (
|
|
695
|
+
rootPath: string,
|
|
696
|
+
provider: string,
|
|
697
|
+
model: string,
|
|
698
|
+
dimensions: number,
|
|
699
|
+
hnswOptions?: HnswBuildOptions,
|
|
700
|
+
): VectorStore => {
|
|
701
|
+
const namespace = generateNamespace(provider, model, dimensions)
|
|
702
|
+
const store = new HnswVectorStore(rootPath, dimensions, hnswOptions)
|
|
703
|
+
store.setNamespace(namespace)
|
|
704
|
+
return store
|
|
705
|
+
}
|
|
297
706
|
|
|
298
707
|
// Export the class for type access
|
|
299
708
|
export { HnswVectorStore }
|