mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
|
@@ -0,0 +1,947 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Namespace Management
|
|
3
|
+
*
|
|
4
|
+
* Provides namespaced storage for multiple embedding providers/models.
|
|
5
|
+
* Each provider/model combination gets its own directory with isolated
|
|
6
|
+
* vector index and metadata.
|
|
7
|
+
*
|
|
8
|
+
* Directory structure:
|
|
9
|
+
* .mdcontext/
|
|
10
|
+
* embeddings/
|
|
11
|
+
* openai_text-embedding-3-small_512/
|
|
12
|
+
* vectors.bin
|
|
13
|
+
* vectors.meta.bin
|
|
14
|
+
* voyage_voyage-3.5-lite_1024/
|
|
15
|
+
* vectors.bin
|
|
16
|
+
* vectors.meta.bin
|
|
17
|
+
* active-provider.json (points to current active namespace)
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import * as fs from 'node:fs/promises'
|
|
21
|
+
import * as path from 'node:path'
|
|
22
|
+
import * as msgpack from '@msgpack/msgpack'
|
|
23
|
+
import { Effect } from 'effect'
|
|
24
|
+
import { INDEX_DIR } from '../index/types.js'
|
|
25
|
+
import type { VectorIndex } from './types.js'
|
|
26
|
+
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// Constants
|
|
29
|
+
// ============================================================================
|
|
30
|
+
|
|
31
|
+
const EMBEDDINGS_DIR = 'embeddings'
|
|
32
|
+
const ACTIVE_PROVIDER_FILE = 'active-provider.json'
|
|
33
|
+
const VECTOR_INDEX_FILE = 'vectors.bin'
|
|
34
|
+
const VECTOR_META_FILE = 'vectors.meta.bin'
|
|
35
|
+
const LEGACY_VECTOR_INDEX_FILE = 'vectors.bin'
|
|
36
|
+
const LEGACY_VECTOR_META_FILE = 'vectors.meta.bin'
|
|
37
|
+
const LEGACY_VECTOR_META_JSON = 'vectors.meta.json'
|
|
38
|
+
|
|
39
|
+
// ============================================================================
|
|
40
|
+
// Types
|
|
41
|
+
// ============================================================================
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Active provider configuration stored in active-provider.json
|
|
45
|
+
*/
|
|
46
|
+
export interface ActiveProvider {
|
|
47
|
+
/** Namespace directory name (e.g., "openai_text-embedding-3-small_512") */
|
|
48
|
+
readonly namespace: string
|
|
49
|
+
/** Provider name (e.g., "openai", "voyage") */
|
|
50
|
+
readonly provider: string
|
|
51
|
+
/** Model name (e.g., "text-embedding-3-small") */
|
|
52
|
+
readonly model: string
|
|
53
|
+
/** Embedding dimensions */
|
|
54
|
+
readonly dimensions: number
|
|
55
|
+
/** When this was set as active */
|
|
56
|
+
readonly activatedAt: string
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Information about an available embedding namespace
|
|
61
|
+
*/
|
|
62
|
+
export interface EmbeddingNamespace {
|
|
63
|
+
/** Namespace directory name */
|
|
64
|
+
readonly namespace: string
|
|
65
|
+
/** Provider name */
|
|
66
|
+
readonly provider: string
|
|
67
|
+
/** Model name */
|
|
68
|
+
readonly model: string
|
|
69
|
+
/** Embedding dimensions */
|
|
70
|
+
readonly dimensions: number
|
|
71
|
+
/** Number of vectors stored */
|
|
72
|
+
readonly vectorCount: number
|
|
73
|
+
/** Total cost accumulated */
|
|
74
|
+
readonly totalCost: number
|
|
75
|
+
/** Total tokens used */
|
|
76
|
+
readonly totalTokens: number
|
|
77
|
+
/** When this index was created */
|
|
78
|
+
readonly createdAt: string
|
|
79
|
+
/** When this index was last updated */
|
|
80
|
+
readonly updatedAt: string
|
|
81
|
+
/** Whether this is the currently active namespace */
|
|
82
|
+
readonly isActive: boolean
|
|
83
|
+
/** Size in bytes of the index files */
|
|
84
|
+
readonly sizeBytes: number
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export class EmbeddingNamespaceError extends Error {
|
|
88
|
+
readonly _tag = 'EmbeddingNamespaceError'
|
|
89
|
+
readonly operation: string
|
|
90
|
+
readonly cause?: unknown
|
|
91
|
+
|
|
92
|
+
constructor(params: { operation: string; message: string; cause?: unknown }) {
|
|
93
|
+
super(params.message)
|
|
94
|
+
this.name = 'EmbeddingNamespaceError'
|
|
95
|
+
this.operation = params.operation
|
|
96
|
+
this.cause = params.cause
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ============================================================================
|
|
101
|
+
// Namespace Path Utilities
|
|
102
|
+
// ============================================================================
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Generate a namespace directory name from provider info.
|
|
106
|
+
* Format: provider_model_dimensions
|
|
107
|
+
* Characters are sanitized for filesystem compatibility.
|
|
108
|
+
*
|
|
109
|
+
* @example
|
|
110
|
+
* generateNamespace("openai", "text-embedding-3-small", 512)
|
|
111
|
+
* // Returns: "openai_text-embedding-3-small_512"
|
|
112
|
+
*
|
|
113
|
+
* @throws Error if provider or model is empty after sanitization
|
|
114
|
+
*/
|
|
115
|
+
export const generateNamespace = (
|
|
116
|
+
provider: string,
|
|
117
|
+
model: string,
|
|
118
|
+
dimensions: number,
|
|
119
|
+
): string => {
|
|
120
|
+
// Sanitize for filesystem: replace non-alphanumeric (except -) with _
|
|
121
|
+
const sanitize = (s: string): string =>
|
|
122
|
+
s.replace(/[^a-zA-Z0-9-]/g, '_').toLowerCase()
|
|
123
|
+
|
|
124
|
+
const sanitizedProvider = sanitize(provider)
|
|
125
|
+
const sanitizedModel = sanitize(model)
|
|
126
|
+
|
|
127
|
+
// Validate non-empty after sanitization
|
|
128
|
+
if (!sanitizedProvider || sanitizedProvider.length === 0) {
|
|
129
|
+
throw new Error('Provider name cannot be empty')
|
|
130
|
+
}
|
|
131
|
+
if (!sanitizedModel || sanitizedModel.length === 0) {
|
|
132
|
+
throw new Error('Model name cannot be empty')
|
|
133
|
+
}
|
|
134
|
+
if (dimensions <= 0 || !Number.isFinite(dimensions)) {
|
|
135
|
+
throw new Error('Dimensions must be a positive number')
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return `${sanitizedProvider}_${sanitizedModel}_${dimensions}`
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Parse a namespace directory name back into its components.
|
|
143
|
+
*
|
|
144
|
+
* @returns Parsed components or null if invalid format
|
|
145
|
+
*/
|
|
146
|
+
export const parseNamespace = (
|
|
147
|
+
namespace: string,
|
|
148
|
+
): { provider: string; model: string; dimensions: number } | null => {
|
|
149
|
+
// Format: provider_model_dimensions
|
|
150
|
+
// The model can contain underscores, so we need to be careful
|
|
151
|
+
if (!namespace || namespace.length === 0) return null
|
|
152
|
+
|
|
153
|
+
const lastUnderscoreIdx = namespace.lastIndexOf('_')
|
|
154
|
+
if (lastUnderscoreIdx === -1) return null
|
|
155
|
+
|
|
156
|
+
const dimensionsStr = namespace.slice(lastUnderscoreIdx + 1)
|
|
157
|
+
// Strict validation: dimensions must be digits only
|
|
158
|
+
if (!/^\d+$/.test(dimensionsStr)) return null
|
|
159
|
+
|
|
160
|
+
const dimensions = parseInt(dimensionsStr, 10)
|
|
161
|
+
if (Number.isNaN(dimensions) || dimensions <= 0) return null
|
|
162
|
+
|
|
163
|
+
const providerModel = namespace.slice(0, lastUnderscoreIdx)
|
|
164
|
+
const firstUnderscoreIdx = providerModel.indexOf('_')
|
|
165
|
+
if (firstUnderscoreIdx === -1) return null
|
|
166
|
+
|
|
167
|
+
const provider = providerModel.slice(0, firstUnderscoreIdx)
|
|
168
|
+
const model = providerModel.slice(firstUnderscoreIdx + 1)
|
|
169
|
+
|
|
170
|
+
// Reject empty provider or model
|
|
171
|
+
if (!provider || provider.length === 0) return null
|
|
172
|
+
if (!model || model.length === 0) return null
|
|
173
|
+
|
|
174
|
+
return { provider, model, dimensions }
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Get the embeddings directory path for a root path.
|
|
179
|
+
*/
|
|
180
|
+
export const getEmbeddingsDir = (rootPath: string): string =>
|
|
181
|
+
path.join(rootPath, INDEX_DIR, EMBEDDINGS_DIR)
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Validate that a namespace doesn't contain path traversal sequences.
|
|
185
|
+
* @throws Error if namespace contains unsafe characters
|
|
186
|
+
*/
|
|
187
|
+
const validateNamespace = (namespace: string): void => {
|
|
188
|
+
// Reject path separators and traversal patterns
|
|
189
|
+
if (
|
|
190
|
+
namespace.includes('/') ||
|
|
191
|
+
namespace.includes('\\') ||
|
|
192
|
+
namespace.includes('..') ||
|
|
193
|
+
namespace.includes('\0')
|
|
194
|
+
) {
|
|
195
|
+
throw new Error(
|
|
196
|
+
`Invalid namespace: contains path separators or traversal sequences`,
|
|
197
|
+
)
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Get the namespace directory path.
|
|
203
|
+
* @throws Error if namespace contains path traversal sequences
|
|
204
|
+
*/
|
|
205
|
+
export const getNamespaceDir = (
|
|
206
|
+
rootPath: string,
|
|
207
|
+
namespace: string,
|
|
208
|
+
): string => {
|
|
209
|
+
validateNamespace(namespace)
|
|
210
|
+
const embeddingsDir = getEmbeddingsDir(rootPath)
|
|
211
|
+
const resolved = path.join(embeddingsDir, namespace)
|
|
212
|
+
|
|
213
|
+
// Extra safety: ensure resolved path is within embeddings directory
|
|
214
|
+
const normalizedEmbeddings = path.resolve(embeddingsDir)
|
|
215
|
+
const normalizedResolved = path.resolve(resolved)
|
|
216
|
+
if (!normalizedResolved.startsWith(normalizedEmbeddings + path.sep)) {
|
|
217
|
+
throw new Error(`Invalid namespace: resolves outside embeddings directory`)
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return resolved
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Get the vector index file path for a namespace.
|
|
225
|
+
*/
|
|
226
|
+
export const getVectorPath = (rootPath: string, namespace: string): string =>
|
|
227
|
+
path.join(getNamespaceDir(rootPath, namespace), VECTOR_INDEX_FILE)
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Get the metadata file path for a namespace.
|
|
231
|
+
*/
|
|
232
|
+
export const getMetaPath = (rootPath: string, namespace: string): string =>
|
|
233
|
+
path.join(getNamespaceDir(rootPath, namespace), VECTOR_META_FILE)
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Get the active provider file path.
|
|
237
|
+
*/
|
|
238
|
+
export const getActiveProviderPath = (rootPath: string): string =>
|
|
239
|
+
path.join(rootPath, INDEX_DIR, ACTIVE_PROVIDER_FILE)
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Get legacy vector paths (for migration).
|
|
243
|
+
*/
|
|
244
|
+
export const getLegacyVectorPath = (rootPath: string): string =>
|
|
245
|
+
path.join(rootPath, INDEX_DIR, LEGACY_VECTOR_INDEX_FILE)
|
|
246
|
+
|
|
247
|
+
export const getLegacyMetaPath = (rootPath: string): string =>
|
|
248
|
+
path.join(rootPath, INDEX_DIR, LEGACY_VECTOR_META_FILE)
|
|
249
|
+
|
|
250
|
+
export const getLegacyMetaJsonPath = (rootPath: string): string =>
|
|
251
|
+
path.join(rootPath, INDEX_DIR, LEGACY_VECTOR_META_JSON)
|
|
252
|
+
|
|
253
|
+
// ============================================================================
|
|
254
|
+
// Active Provider Management
|
|
255
|
+
// ============================================================================
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Read the currently active provider configuration.
|
|
259
|
+
*
|
|
260
|
+
* @returns Active provider info or null if not set
|
|
261
|
+
*/
|
|
262
|
+
export const readActiveProvider = (
|
|
263
|
+
rootPath: string,
|
|
264
|
+
): Effect.Effect<ActiveProvider | null, EmbeddingNamespaceError> =>
|
|
265
|
+
Effect.gen(function* () {
|
|
266
|
+
const filePath = getActiveProviderPath(rootPath)
|
|
267
|
+
|
|
268
|
+
const exists = yield* Effect.tryPromise({
|
|
269
|
+
try: async () => {
|
|
270
|
+
await fs.access(filePath)
|
|
271
|
+
return true
|
|
272
|
+
},
|
|
273
|
+
catch: () =>
|
|
274
|
+
new EmbeddingNamespaceError({
|
|
275
|
+
operation: 'readActiveProvider',
|
|
276
|
+
message: 'File not found',
|
|
277
|
+
}),
|
|
278
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)))
|
|
279
|
+
|
|
280
|
+
if (!exists) {
|
|
281
|
+
return null
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const content = yield* Effect.tryPromise({
|
|
285
|
+
try: () => fs.readFile(filePath, 'utf-8'),
|
|
286
|
+
catch: (e) =>
|
|
287
|
+
new EmbeddingNamespaceError({
|
|
288
|
+
operation: 'readActiveProvider',
|
|
289
|
+
message: `Failed to read active provider: ${e}`,
|
|
290
|
+
cause: e,
|
|
291
|
+
}),
|
|
292
|
+
})
|
|
293
|
+
|
|
294
|
+
return yield* Effect.try({
|
|
295
|
+
try: () => JSON.parse(content) as ActiveProvider,
|
|
296
|
+
catch: (e) =>
|
|
297
|
+
new EmbeddingNamespaceError({
|
|
298
|
+
operation: 'readActiveProvider',
|
|
299
|
+
message: `Failed to parse active provider: ${e}`,
|
|
300
|
+
cause: e,
|
|
301
|
+
}),
|
|
302
|
+
})
|
|
303
|
+
})
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Write the active provider configuration.
|
|
307
|
+
*/
|
|
308
|
+
export const writeActiveProvider = (
|
|
309
|
+
rootPath: string,
|
|
310
|
+
activeProvider: ActiveProvider,
|
|
311
|
+
): Effect.Effect<void, EmbeddingNamespaceError> =>
|
|
312
|
+
Effect.gen(function* () {
|
|
313
|
+
const filePath = getActiveProviderPath(rootPath)
|
|
314
|
+
const indexDir = path.dirname(filePath)
|
|
315
|
+
|
|
316
|
+
yield* Effect.tryPromise({
|
|
317
|
+
try: () => fs.mkdir(indexDir, { recursive: true }),
|
|
318
|
+
catch: (e) =>
|
|
319
|
+
new EmbeddingNamespaceError({
|
|
320
|
+
operation: 'writeActiveProvider',
|
|
321
|
+
message: `Failed to create directory: ${e}`,
|
|
322
|
+
cause: e,
|
|
323
|
+
}),
|
|
324
|
+
})
|
|
325
|
+
|
|
326
|
+
yield* Effect.tryPromise({
|
|
327
|
+
try: () =>
|
|
328
|
+
fs.writeFile(filePath, JSON.stringify(activeProvider, null, 2)),
|
|
329
|
+
catch: (e) =>
|
|
330
|
+
new EmbeddingNamespaceError({
|
|
331
|
+
operation: 'writeActiveProvider',
|
|
332
|
+
message: `Failed to write active provider: ${e}`,
|
|
333
|
+
cause: e,
|
|
334
|
+
}),
|
|
335
|
+
})
|
|
336
|
+
})
|
|
337
|
+
|
|
338
|
+
// ============================================================================
|
|
339
|
+
// Namespace Discovery
|
|
340
|
+
// ============================================================================
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* List all available embedding namespaces.
|
|
344
|
+
*/
|
|
345
|
+
export const listNamespaces = (
|
|
346
|
+
rootPath: string,
|
|
347
|
+
): Effect.Effect<EmbeddingNamespace[], EmbeddingNamespaceError> =>
|
|
348
|
+
Effect.gen(function* () {
|
|
349
|
+
const embeddingsDir = getEmbeddingsDir(rootPath)
|
|
350
|
+
|
|
351
|
+
// Check if embeddings directory exists
|
|
352
|
+
const exists = yield* Effect.tryPromise({
|
|
353
|
+
try: async () => {
|
|
354
|
+
await fs.access(embeddingsDir)
|
|
355
|
+
return true
|
|
356
|
+
},
|
|
357
|
+
catch: () =>
|
|
358
|
+
new EmbeddingNamespaceError({
|
|
359
|
+
operation: 'listNamespaces',
|
|
360
|
+
message: 'Directory not found',
|
|
361
|
+
}),
|
|
362
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)))
|
|
363
|
+
|
|
364
|
+
if (!exists) {
|
|
365
|
+
return []
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Get active provider for comparison
|
|
369
|
+
const activeProvider = yield* readActiveProvider(rootPath).pipe(
|
|
370
|
+
Effect.catchAll(() => Effect.succeed(null)),
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
// Read directory entries
|
|
374
|
+
const entries = yield* Effect.tryPromise({
|
|
375
|
+
try: () => fs.readdir(embeddingsDir, { withFileTypes: true }),
|
|
376
|
+
catch: (e) =>
|
|
377
|
+
new EmbeddingNamespaceError({
|
|
378
|
+
operation: 'listNamespaces',
|
|
379
|
+
message: `Failed to read embeddings directory: ${e}`,
|
|
380
|
+
cause: e,
|
|
381
|
+
}),
|
|
382
|
+
})
|
|
383
|
+
|
|
384
|
+
const namespaces: EmbeddingNamespace[] = []
|
|
385
|
+
|
|
386
|
+
for (const entry of entries) {
|
|
387
|
+
if (!entry.isDirectory()) continue
|
|
388
|
+
|
|
389
|
+
const namespace = entry.name
|
|
390
|
+
// Use parseNamespace just for initial validation that this looks like a valid namespace directory
|
|
391
|
+
const parsed = parseNamespace(namespace)
|
|
392
|
+
if (!parsed) continue
|
|
393
|
+
|
|
394
|
+
// Try to read metadata
|
|
395
|
+
const metaPath = getMetaPath(rootPath, namespace)
|
|
396
|
+
const vectorPath = getVectorPath(rootPath, namespace)
|
|
397
|
+
|
|
398
|
+
const metaExists = yield* Effect.tryPromise({
|
|
399
|
+
try: async () => {
|
|
400
|
+
await fs.access(metaPath)
|
|
401
|
+
return true
|
|
402
|
+
},
|
|
403
|
+
catch: () =>
|
|
404
|
+
new EmbeddingNamespaceError({
|
|
405
|
+
operation: 'listNamespaces',
|
|
406
|
+
message: 'Meta not found',
|
|
407
|
+
}),
|
|
408
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)))
|
|
409
|
+
|
|
410
|
+
if (!metaExists) continue
|
|
411
|
+
|
|
412
|
+
// Read metadata
|
|
413
|
+
const meta = yield* Effect.tryPromise({
|
|
414
|
+
try: async () => {
|
|
415
|
+
const buffer = await fs.readFile(metaPath)
|
|
416
|
+
return msgpack.decode(buffer) as VectorIndex
|
|
417
|
+
},
|
|
418
|
+
catch: (e) =>
|
|
419
|
+
new EmbeddingNamespaceError({
|
|
420
|
+
operation: 'listNamespaces',
|
|
421
|
+
message: `Failed to read metadata: ${e}`,
|
|
422
|
+
cause: e,
|
|
423
|
+
}),
|
|
424
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(null)))
|
|
425
|
+
|
|
426
|
+
if (!meta) continue
|
|
427
|
+
|
|
428
|
+
// Get file sizes
|
|
429
|
+
const [metaStats, vectorStats] = yield* Effect.all([
|
|
430
|
+
Effect.tryPromise({
|
|
431
|
+
try: () => fs.stat(metaPath),
|
|
432
|
+
catch: () =>
|
|
433
|
+
new EmbeddingNamespaceError({
|
|
434
|
+
operation: 'listNamespaces',
|
|
435
|
+
message: 'Failed to stat meta',
|
|
436
|
+
}),
|
|
437
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(null))),
|
|
438
|
+
Effect.tryPromise({
|
|
439
|
+
try: () => fs.stat(vectorPath),
|
|
440
|
+
catch: () =>
|
|
441
|
+
new EmbeddingNamespaceError({
|
|
442
|
+
operation: 'listNamespaces',
|
|
443
|
+
message: 'Failed to stat vector',
|
|
444
|
+
}),
|
|
445
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(null))),
|
|
446
|
+
])
|
|
447
|
+
|
|
448
|
+
const sizeBytes = (metaStats?.size ?? 0) + (vectorStats?.size ?? 0)
|
|
449
|
+
|
|
450
|
+
// Use VectorIndex metadata as the source of truth for provider/model/dimensions
|
|
451
|
+
// Fall back to parseNamespace only if metadata fields are missing (legacy indexes)
|
|
452
|
+
const provider = meta.provider || parsed.provider
|
|
453
|
+
const model = meta.providerModel || parsed.model
|
|
454
|
+
const dimensions = meta.dimensions || parsed.dimensions
|
|
455
|
+
|
|
456
|
+
namespaces.push({
|
|
457
|
+
namespace,
|
|
458
|
+
provider,
|
|
459
|
+
model,
|
|
460
|
+
dimensions,
|
|
461
|
+
vectorCount: Object.keys(meta.entries).length,
|
|
462
|
+
totalCost: meta.totalCost ?? 0,
|
|
463
|
+
totalTokens: meta.totalTokens ?? 0,
|
|
464
|
+
createdAt: meta.createdAt,
|
|
465
|
+
updatedAt: meta.updatedAt,
|
|
466
|
+
isActive: activeProvider?.namespace === namespace,
|
|
467
|
+
sizeBytes,
|
|
468
|
+
})
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// Sort by most recently updated
|
|
472
|
+
namespaces.sort(
|
|
473
|
+
(a, b) =>
|
|
474
|
+
new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime(),
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
return namespaces
|
|
478
|
+
})
|
|
479
|
+
|
|
480
|
+
// ============================================================================
|
|
481
|
+
// Namespace Operations
|
|
482
|
+
// ============================================================================
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Switch to a different embedding namespace.
|
|
486
|
+
* Updates the active-provider.json to point to the new namespace.
|
|
487
|
+
*
|
|
488
|
+
* @param rootPath - Root directory
|
|
489
|
+
* @param namespace - Full namespace name or partial match
|
|
490
|
+
* @returns The activated namespace info
|
|
491
|
+
*/
|
|
492
|
+
export const switchNamespace = (
|
|
493
|
+
rootPath: string,
|
|
494
|
+
namespaceQuery: string,
|
|
495
|
+
): Effect.Effect<EmbeddingNamespace, EmbeddingNamespaceError> =>
|
|
496
|
+
Effect.gen(function* () {
|
|
497
|
+
const namespaces = yield* listNamespaces(rootPath)
|
|
498
|
+
|
|
499
|
+
if (namespaces.length === 0) {
|
|
500
|
+
return yield* Effect.fail(
|
|
501
|
+
new EmbeddingNamespaceError({
|
|
502
|
+
operation: 'switchNamespace',
|
|
503
|
+
message:
|
|
504
|
+
'No embedding namespaces found. Run "mdcontext index --embed" first.',
|
|
505
|
+
}),
|
|
506
|
+
)
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Find matching namespace (exact or fuzzy)
|
|
510
|
+
const queryLower = namespaceQuery.toLowerCase()
|
|
511
|
+
let matches = namespaces.filter(
|
|
512
|
+
(ns) =>
|
|
513
|
+
ns.namespace.toLowerCase() === queryLower ||
|
|
514
|
+
ns.provider.toLowerCase() === queryLower ||
|
|
515
|
+
ns.model.toLowerCase().includes(queryLower) ||
|
|
516
|
+
ns.namespace.toLowerCase().includes(queryLower),
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
if (matches.length === 0) {
|
|
520
|
+
return yield* Effect.fail(
|
|
521
|
+
new EmbeddingNamespaceError({
|
|
522
|
+
operation: 'switchNamespace',
|
|
523
|
+
message: `No namespace matching "${namespaceQuery}". Available: ${namespaces.map((n) => n.namespace).join(', ')}`,
|
|
524
|
+
}),
|
|
525
|
+
)
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if (matches.length > 1) {
|
|
529
|
+
// Try exact match first
|
|
530
|
+
const exact = matches.find(
|
|
531
|
+
(ns) =>
|
|
532
|
+
ns.namespace.toLowerCase() === queryLower ||
|
|
533
|
+
ns.provider.toLowerCase() === queryLower,
|
|
534
|
+
)
|
|
535
|
+
if (exact) {
|
|
536
|
+
matches = [exact]
|
|
537
|
+
} else {
|
|
538
|
+
return yield* Effect.fail(
|
|
539
|
+
new EmbeddingNamespaceError({
|
|
540
|
+
operation: 'switchNamespace',
|
|
541
|
+
message: `Multiple namespaces match "${namespaceQuery}": ${matches.map((n) => n.namespace).join(', ')}. Be more specific.`,
|
|
542
|
+
}),
|
|
543
|
+
)
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
const target = matches[0]!
|
|
548
|
+
|
|
549
|
+
// Update active provider
|
|
550
|
+
yield* writeActiveProvider(rootPath, {
|
|
551
|
+
namespace: target.namespace,
|
|
552
|
+
provider: target.provider,
|
|
553
|
+
model: target.model,
|
|
554
|
+
dimensions: target.dimensions,
|
|
555
|
+
activatedAt: new Date().toISOString(),
|
|
556
|
+
})
|
|
557
|
+
|
|
558
|
+
return { ...target, isActive: true }
|
|
559
|
+
})
|
|
560
|
+
|
|
561
|
+
/**
|
|
562
|
+
* Remove an embedding namespace.
|
|
563
|
+
*
|
|
564
|
+
* @param rootPath - Root directory
|
|
565
|
+
* @param namespaceQuery - Full namespace name or partial match
|
|
566
|
+
* @param force - Skip confirmation for active namespace
|
|
567
|
+
*/
|
|
568
|
+
export const removeNamespace = (
|
|
569
|
+
rootPath: string,
|
|
570
|
+
namespaceQuery: string,
|
|
571
|
+
options: { force?: boolean } = {},
|
|
572
|
+
): Effect.Effect<
|
|
573
|
+
{ removed: string; wasActive: boolean },
|
|
574
|
+
EmbeddingNamespaceError
|
|
575
|
+
> =>
|
|
576
|
+
Effect.gen(function* () {
|
|
577
|
+
const namespaces = yield* listNamespaces(rootPath)
|
|
578
|
+
|
|
579
|
+
if (namespaces.length === 0) {
|
|
580
|
+
return yield* Effect.fail(
|
|
581
|
+
new EmbeddingNamespaceError({
|
|
582
|
+
operation: 'removeNamespace',
|
|
583
|
+
message: 'No embedding namespaces found.',
|
|
584
|
+
}),
|
|
585
|
+
)
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// Find matching namespace
|
|
589
|
+
const queryLower = namespaceQuery.toLowerCase()
|
|
590
|
+
let matches = namespaces.filter(
|
|
591
|
+
(ns) =>
|
|
592
|
+
ns.namespace.toLowerCase() === queryLower ||
|
|
593
|
+
ns.namespace.toLowerCase().includes(queryLower),
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
if (matches.length === 0) {
|
|
597
|
+
return yield* Effect.fail(
|
|
598
|
+
new EmbeddingNamespaceError({
|
|
599
|
+
operation: 'removeNamespace',
|
|
600
|
+
message: `No namespace matching "${namespaceQuery}". Available: ${namespaces.map((n) => n.namespace).join(', ')}`,
|
|
601
|
+
}),
|
|
602
|
+
)
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if (matches.length > 1) {
|
|
606
|
+
const exact = matches.find(
|
|
607
|
+
(ns) => ns.namespace.toLowerCase() === queryLower,
|
|
608
|
+
)
|
|
609
|
+
if (exact) {
|
|
610
|
+
matches = [exact]
|
|
611
|
+
} else {
|
|
612
|
+
return yield* Effect.fail(
|
|
613
|
+
new EmbeddingNamespaceError({
|
|
614
|
+
operation: 'removeNamespace',
|
|
615
|
+
message: `Multiple namespaces match "${namespaceQuery}": ${matches.map((n) => n.namespace).join(', ')}. Be more specific.`,
|
|
616
|
+
}),
|
|
617
|
+
)
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
const target = matches[0]!
|
|
622
|
+
|
|
623
|
+
// Check if this is the active namespace
|
|
624
|
+
if (target.isActive && !options.force) {
|
|
625
|
+
return yield* Effect.fail(
|
|
626
|
+
new EmbeddingNamespaceError({
|
|
627
|
+
operation: 'removeNamespace',
|
|
628
|
+
message: `Cannot remove active namespace "${target.namespace}". Use --force to override or switch to another namespace first.`,
|
|
629
|
+
}),
|
|
630
|
+
)
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// Remove the namespace directory
|
|
634
|
+
const namespaceDir = getNamespaceDir(rootPath, target.namespace)
|
|
635
|
+
yield* Effect.tryPromise({
|
|
636
|
+
try: () => fs.rm(namespaceDir, { recursive: true, force: true }),
|
|
637
|
+
catch: (e) =>
|
|
638
|
+
new EmbeddingNamespaceError({
|
|
639
|
+
operation: 'removeNamespace',
|
|
640
|
+
message: `Failed to remove namespace directory: ${e}`,
|
|
641
|
+
cause: e,
|
|
642
|
+
}),
|
|
643
|
+
})
|
|
644
|
+
|
|
645
|
+
// If this was the active namespace, clear the active provider
|
|
646
|
+
if (target.isActive) {
|
|
647
|
+
const activeProviderPath = getActiveProviderPath(rootPath)
|
|
648
|
+
yield* Effect.tryPromise({
|
|
649
|
+
try: () => fs.unlink(activeProviderPath),
|
|
650
|
+
catch: (e) =>
|
|
651
|
+
new EmbeddingNamespaceError({
|
|
652
|
+
operation: 'removeNamespace',
|
|
653
|
+
message: `Failed to clear active provider: ${e}`,
|
|
654
|
+
cause: e,
|
|
655
|
+
}),
|
|
656
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(undefined)))
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
return { removed: target.namespace, wasActive: target.isActive }
|
|
660
|
+
})
|
|
661
|
+
|
|
662
|
+
// ============================================================================
|
|
663
|
+
// Migration
|
|
664
|
+
// ============================================================================
|
|
665
|
+
|
|
666
|
+
/**
|
|
667
|
+
* Check if legacy (non-namespaced) embeddings exist.
|
|
668
|
+
*/
|
|
669
|
+
export const hasLegacyEmbeddings = (
|
|
670
|
+
rootPath: string,
|
|
671
|
+
): Effect.Effect<boolean, EmbeddingNamespaceError> =>
|
|
672
|
+
Effect.gen(function* () {
|
|
673
|
+
const legacyBinPath = getLegacyVectorPath(rootPath)
|
|
674
|
+
const legacyMetaBinPath = getLegacyMetaPath(rootPath)
|
|
675
|
+
const legacyMetaJsonPath = getLegacyMetaJsonPath(rootPath)
|
|
676
|
+
const embeddingsDir = getEmbeddingsDir(rootPath)
|
|
677
|
+
|
|
678
|
+
// Check if new embeddings dir exists (migration already done)
|
|
679
|
+
const newExists = yield* Effect.tryPromise({
|
|
680
|
+
try: async () => {
|
|
681
|
+
await fs.access(embeddingsDir)
|
|
682
|
+
return true
|
|
683
|
+
},
|
|
684
|
+
catch: () =>
|
|
685
|
+
new EmbeddingNamespaceError({
|
|
686
|
+
operation: 'hasLegacyEmbeddings',
|
|
687
|
+
message: 'Directory check failed',
|
|
688
|
+
}),
|
|
689
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)))
|
|
690
|
+
|
|
691
|
+
if (newExists) {
|
|
692
|
+
return false
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Check if legacy files exist
|
|
696
|
+
const legacyBinExists = yield* Effect.tryPromise({
|
|
697
|
+
try: async () => {
|
|
698
|
+
await fs.access(legacyBinPath)
|
|
699
|
+
return true
|
|
700
|
+
},
|
|
701
|
+
catch: () =>
|
|
702
|
+
new EmbeddingNamespaceError({
|
|
703
|
+
operation: 'hasLegacyEmbeddings',
|
|
704
|
+
message: 'File check failed',
|
|
705
|
+
}),
|
|
706
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)))
|
|
707
|
+
|
|
708
|
+
if (!legacyBinExists) {
|
|
709
|
+
return false
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// Check for either binary or JSON metadata
|
|
713
|
+
const legacyMetaExists = yield* Effect.tryPromise({
|
|
714
|
+
try: async () => {
|
|
715
|
+
try {
|
|
716
|
+
await fs.access(legacyMetaBinPath)
|
|
717
|
+
return true
|
|
718
|
+
} catch {
|
|
719
|
+
await fs.access(legacyMetaJsonPath)
|
|
720
|
+
return true
|
|
721
|
+
}
|
|
722
|
+
},
|
|
723
|
+
catch: () =>
|
|
724
|
+
new EmbeddingNamespaceError({
|
|
725
|
+
operation: 'hasLegacyEmbeddings',
|
|
726
|
+
message: 'Meta check failed',
|
|
727
|
+
}),
|
|
728
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)))
|
|
729
|
+
|
|
730
|
+
return legacyMetaExists
|
|
731
|
+
})
|
|
732
|
+
|
|
733
|
+
/**
|
|
734
|
+
* Migrate legacy embeddings to the new namespaced format.
|
|
735
|
+
* This is non-destructive - legacy files are moved, not deleted.
|
|
736
|
+
*/
|
|
737
|
+
export const migrateLegacyEmbeddings = (
|
|
738
|
+
rootPath: string,
|
|
739
|
+
): Effect.Effect<
|
|
740
|
+
{ namespace: string; vectorCount: number } | null,
|
|
741
|
+
EmbeddingNamespaceError
|
|
742
|
+
> =>
|
|
743
|
+
Effect.gen(function* () {
|
|
744
|
+
const hasLegacy = yield* hasLegacyEmbeddings(rootPath)
|
|
745
|
+
if (!hasLegacy) {
|
|
746
|
+
return null
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// Read legacy metadata to determine provider/model/dimensions
|
|
750
|
+
const legacyMetaBinPath = getLegacyMetaPath(rootPath)
|
|
751
|
+
const legacyMetaJsonPath = getLegacyMetaJsonPath(rootPath)
|
|
752
|
+
|
|
753
|
+
let meta: VectorIndex | null = null
|
|
754
|
+
|
|
755
|
+
// Try binary format first
|
|
756
|
+
meta = yield* Effect.tryPromise({
|
|
757
|
+
try: async () => {
|
|
758
|
+
const buffer = await fs.readFile(legacyMetaBinPath)
|
|
759
|
+
return msgpack.decode(buffer) as VectorIndex
|
|
760
|
+
},
|
|
761
|
+
catch: () =>
|
|
762
|
+
new EmbeddingNamespaceError({
|
|
763
|
+
operation: 'migrateLegacyEmbeddings',
|
|
764
|
+
message: 'Failed to read binary meta',
|
|
765
|
+
}),
|
|
766
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(null)))
|
|
767
|
+
|
|
768
|
+
// Fall back to JSON
|
|
769
|
+
if (!meta) {
|
|
770
|
+
meta = yield* Effect.tryPromise({
|
|
771
|
+
try: async () => {
|
|
772
|
+
const content = await fs.readFile(legacyMetaJsonPath, 'utf-8')
|
|
773
|
+
return JSON.parse(content) as VectorIndex
|
|
774
|
+
},
|
|
775
|
+
catch: () =>
|
|
776
|
+
new EmbeddingNamespaceError({
|
|
777
|
+
operation: 'migrateLegacyEmbeddings',
|
|
778
|
+
message: 'Failed to read JSON meta',
|
|
779
|
+
}),
|
|
780
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(null)))
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
if (!meta) {
|
|
784
|
+
return yield* Effect.fail(
|
|
785
|
+
new EmbeddingNamespaceError({
|
|
786
|
+
operation: 'migrateLegacyEmbeddings',
|
|
787
|
+
message:
|
|
788
|
+
'Could not read legacy metadata. Embeddings may be corrupted.',
|
|
789
|
+
}),
|
|
790
|
+
)
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
// Determine provider info from metadata
|
|
794
|
+
// Legacy format may have provider as "openai:text-embedding-3-small" or just "openai"
|
|
795
|
+
let provider = meta.provider || 'openai'
|
|
796
|
+
let model = meta.providerModel || 'text-embedding-3-small'
|
|
797
|
+
|
|
798
|
+
// Handle legacy "provider:model" format
|
|
799
|
+
if (provider.includes(':') && !meta.providerModel) {
|
|
800
|
+
const parts = provider.split(':')
|
|
801
|
+
provider = parts[0]!
|
|
802
|
+
model = parts[1] || model
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
const dimensions = meta.dimensions
|
|
806
|
+
|
|
807
|
+
// Generate namespace
|
|
808
|
+
const namespace = generateNamespace(provider, model, dimensions)
|
|
809
|
+
const namespaceDir = getNamespaceDir(rootPath, namespace)
|
|
810
|
+
|
|
811
|
+
// Create namespace directory
|
|
812
|
+
yield* Effect.tryPromise({
|
|
813
|
+
try: () => fs.mkdir(namespaceDir, { recursive: true }),
|
|
814
|
+
catch: (e) =>
|
|
815
|
+
new EmbeddingNamespaceError({
|
|
816
|
+
operation: 'migrateLegacyEmbeddings',
|
|
817
|
+
message: `Failed to create namespace directory: ${e}`,
|
|
818
|
+
cause: e,
|
|
819
|
+
}),
|
|
820
|
+
})
|
|
821
|
+
|
|
822
|
+
// Move vector file
|
|
823
|
+
const legacyBinPath = getLegacyVectorPath(rootPath)
|
|
824
|
+
const newVectorPath = getVectorPath(rootPath, namespace)
|
|
825
|
+
yield* Effect.tryPromise({
|
|
826
|
+
try: () => fs.rename(legacyBinPath, newVectorPath),
|
|
827
|
+
catch: (e) =>
|
|
828
|
+
new EmbeddingNamespaceError({
|
|
829
|
+
operation: 'migrateLegacyEmbeddings',
|
|
830
|
+
message: `Failed to move vector file: ${e}`,
|
|
831
|
+
cause: e,
|
|
832
|
+
}),
|
|
833
|
+
})
|
|
834
|
+
|
|
835
|
+
// Move/create metadata file (always use binary in new location)
|
|
836
|
+
const newMetaPath = getMetaPath(rootPath, namespace)
|
|
837
|
+
|
|
838
|
+
// Update provider info in metadata
|
|
839
|
+
const updatedMeta: VectorIndex = {
|
|
840
|
+
...meta,
|
|
841
|
+
provider,
|
|
842
|
+
providerModel: model,
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
yield* Effect.tryPromise({
|
|
846
|
+
try: async () => {
|
|
847
|
+
const encoded = msgpack.encode(updatedMeta)
|
|
848
|
+
await fs.writeFile(newMetaPath, encoded)
|
|
849
|
+
},
|
|
850
|
+
catch: (e) =>
|
|
851
|
+
new EmbeddingNamespaceError({
|
|
852
|
+
operation: 'migrateLegacyEmbeddings',
|
|
853
|
+
message: `Failed to write metadata: ${e}`,
|
|
854
|
+
cause: e,
|
|
855
|
+
}),
|
|
856
|
+
})
|
|
857
|
+
|
|
858
|
+
// Remove old metadata files
|
|
859
|
+
yield* Effect.tryPromise({
|
|
860
|
+
try: () => fs.unlink(legacyMetaBinPath).catch(() => {}),
|
|
861
|
+
catch: () =>
|
|
862
|
+
new EmbeddingNamespaceError({
|
|
863
|
+
operation: 'migrateLegacyEmbeddings',
|
|
864
|
+
message: 'Failed to remove legacy bin meta',
|
|
865
|
+
}),
|
|
866
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(undefined)))
|
|
867
|
+
|
|
868
|
+
yield* Effect.tryPromise({
|
|
869
|
+
try: () => fs.unlink(legacyMetaJsonPath).catch(() => {}),
|
|
870
|
+
catch: () =>
|
|
871
|
+
new EmbeddingNamespaceError({
|
|
872
|
+
operation: 'migrateLegacyEmbeddings',
|
|
873
|
+
message: 'Failed to remove legacy json meta',
|
|
874
|
+
}),
|
|
875
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(undefined)))
|
|
876
|
+
|
|
877
|
+
// Set as active provider
|
|
878
|
+
yield* writeActiveProvider(rootPath, {
|
|
879
|
+
namespace,
|
|
880
|
+
provider,
|
|
881
|
+
model,
|
|
882
|
+
dimensions,
|
|
883
|
+
activatedAt: new Date().toISOString(),
|
|
884
|
+
})
|
|
885
|
+
|
|
886
|
+
return {
|
|
887
|
+
namespace,
|
|
888
|
+
vectorCount: Object.keys(meta.entries).length,
|
|
889
|
+
}
|
|
890
|
+
})
|
|
891
|
+
|
|
892
|
+
/**
|
|
893
|
+
* Get or determine the current active namespace.
|
|
894
|
+
* If no active provider is set, tries to auto-detect from available namespaces.
|
|
895
|
+
* Validates that the active namespace directory still exists.
|
|
896
|
+
*/
|
|
897
|
+
export const getActiveNamespace = (
|
|
898
|
+
rootPath: string,
|
|
899
|
+
): Effect.Effect<ActiveProvider | null, EmbeddingNamespaceError> =>
|
|
900
|
+
Effect.gen(function* () {
|
|
901
|
+
// Try to read active provider
|
|
902
|
+
const active = yield* readActiveProvider(rootPath)
|
|
903
|
+
if (active) {
|
|
904
|
+
// Validate that the namespace directory still exists
|
|
905
|
+
const namespaceDir = getNamespaceDir(rootPath, active.namespace)
|
|
906
|
+
const dirExists = yield* Effect.tryPromise({
|
|
907
|
+
try: async () => {
|
|
908
|
+
await fs.access(namespaceDir)
|
|
909
|
+
return true
|
|
910
|
+
},
|
|
911
|
+
catch: () =>
|
|
912
|
+
new EmbeddingNamespaceError({
|
|
913
|
+
operation: 'getActiveNamespace',
|
|
914
|
+
message: 'Directory check failed',
|
|
915
|
+
}),
|
|
916
|
+
}).pipe(Effect.catchAll(() => Effect.succeed(false)))
|
|
917
|
+
|
|
918
|
+
if (dirExists) {
|
|
919
|
+
return active
|
|
920
|
+
}
|
|
921
|
+
// Active namespace was deleted - fall through to auto-detect
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
// No active provider set or it was deleted - check available namespaces
|
|
925
|
+
const namespaces = yield* listNamespaces(rootPath)
|
|
926
|
+
if (namespaces.length === 0) {
|
|
927
|
+
return null
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
// Auto-activate the most recently updated namespace
|
|
931
|
+
const mostRecent = namespaces[0]!
|
|
932
|
+
yield* writeActiveProvider(rootPath, {
|
|
933
|
+
namespace: mostRecent.namespace,
|
|
934
|
+
provider: mostRecent.provider,
|
|
935
|
+
model: mostRecent.model,
|
|
936
|
+
dimensions: mostRecent.dimensions,
|
|
937
|
+
activatedAt: new Date().toISOString(),
|
|
938
|
+
})
|
|
939
|
+
|
|
940
|
+
return {
|
|
941
|
+
namespace: mostRecent.namespace,
|
|
942
|
+
provider: mostRecent.provider,
|
|
943
|
+
model: mostRecent.model,
|
|
944
|
+
dimensions: mostRecent.dimensions,
|
|
945
|
+
activatedAt: new Date().toISOString(),
|
|
946
|
+
}
|
|
947
|
+
})
|