mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embed + Index Integration Tests
|
|
3
|
+
*
|
|
4
|
+
* Tests the full indexing and embedding pipeline on small and large corpora.
|
|
5
|
+
* Verifies binary format usage, MessagePack handling, metadata creation,
|
|
6
|
+
* and proper index loading after creation.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import * as fs from 'node:fs'
|
|
10
|
+
import * as os from 'node:os'
|
|
11
|
+
import * as path from 'node:path'
|
|
12
|
+
import { Effect } from 'effect'
|
|
13
|
+
import { afterEach, beforeEach, describe, expect, it } from 'vitest'
|
|
14
|
+
import {
|
|
15
|
+
createVectorStore,
|
|
16
|
+
type HnswVectorStore,
|
|
17
|
+
} from '../../src/embeddings/vector-store.js'
|
|
18
|
+
import { buildIndex } from '../../src/index/indexer.js'
|
|
19
|
+
import {
|
|
20
|
+
createStorage,
|
|
21
|
+
loadDocumentIndex,
|
|
22
|
+
loadSectionIndex,
|
|
23
|
+
} from '../../src/index/storage.js'
|
|
24
|
+
|
|
25
|
+
// ============================================================================
|
|
26
|
+
// Test Setup
|
|
27
|
+
// ============================================================================
|
|
28
|
+
|
|
29
|
+
describe('Embed + Index Integration Tests', () => {
|
|
30
|
+
let tempDir: string
|
|
31
|
+
const savedEnv: Record<string, string | undefined> = {}
|
|
32
|
+
|
|
33
|
+
beforeEach(() => {
|
|
34
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'mdcontext-embed-int-'))
|
|
35
|
+
|
|
36
|
+
// Save and mock API key for tests
|
|
37
|
+
savedEnv.OPENAI_API_KEY = process.env.OPENAI_API_KEY
|
|
38
|
+
process.env.OPENAI_API_KEY = 'sk-test-mock-key-for-testing'
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
afterEach(() => {
|
|
42
|
+
fs.rmSync(tempDir, { recursive: true, force: true })
|
|
43
|
+
|
|
44
|
+
// Restore env vars
|
|
45
|
+
for (const [key, value] of Object.entries(savedEnv)) {
|
|
46
|
+
if (value !== undefined) {
|
|
47
|
+
process.env[key] = value
|
|
48
|
+
} else {
|
|
49
|
+
delete process.env[key]
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
// ==========================================================================
|
|
55
|
+
// Helper Functions
|
|
56
|
+
// ==========================================================================
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Create a test markdown file with specified content
|
|
60
|
+
*/
|
|
61
|
+
const createMarkdownFile = (filePath: string, content: string): void => {
|
|
62
|
+
const dir = path.dirname(filePath)
|
|
63
|
+
fs.mkdirSync(dir, { recursive: true })
|
|
64
|
+
fs.writeFileSync(filePath, content, 'utf-8')
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Create a small test corpus (50-100 docs)
|
|
69
|
+
*/
|
|
70
|
+
const createSmallCorpus = (baseDir: string): void => {
|
|
71
|
+
// Create 60 small markdown files across 3 directories
|
|
72
|
+
for (let i = 1; i <= 20; i++) {
|
|
73
|
+
createMarkdownFile(
|
|
74
|
+
path.join(baseDir, 'docs', `file${i}.md`),
|
|
75
|
+
`# Document ${i}\n\nThis is a test document with some content.\n\n## Section 1\n\nContent for section 1.\n\n## Section 2\n\nContent for section 2.`,
|
|
76
|
+
)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
for (let i = 1; i <= 20; i++) {
|
|
80
|
+
createMarkdownFile(
|
|
81
|
+
path.join(baseDir, 'guides', `guide${i}.md`),
|
|
82
|
+
`# Guide ${i}\n\nThis is a guide document.\n\n## Getting Started\n\nHow to get started.\n\n## Advanced Topics\n\nAdvanced usage patterns.`,
|
|
83
|
+
)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
for (let i = 1; i <= 20; i++) {
|
|
87
|
+
createMarkdownFile(
|
|
88
|
+
path.join(baseDir, 'api', `api${i}.md`),
|
|
89
|
+
`# API Reference ${i}\n\nAPI documentation.\n\n## Methods\n\nAvailable methods.\n\n## Examples\n\nUsage examples.`,
|
|
90
|
+
)
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Create a large test corpus (1000+ docs)
|
|
96
|
+
*/
|
|
97
|
+
const createLargeCorpus = (baseDir: string): void => {
|
|
98
|
+
// Create 1050 markdown files across 10 directories
|
|
99
|
+
for (let dir = 1; dir <= 10; dir++) {
|
|
100
|
+
for (let file = 1; file <= 105; file++) {
|
|
101
|
+
createMarkdownFile(
|
|
102
|
+
path.join(baseDir, `section${dir}`, `doc${file}.md`),
|
|
103
|
+
`# Document ${dir}-${file}\n\n## Overview\n\nContent for document ${dir}-${file}.\n\n## Details\n\nDetailed information about this topic.\n\n## Examples\n\nCode examples and usage patterns.`,
|
|
104
|
+
)
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Get file size in bytes
|
|
111
|
+
*/
|
|
112
|
+
const getFileSize = (filePath: string): number => {
|
|
113
|
+
try {
|
|
114
|
+
return fs.statSync(filePath).size
|
|
115
|
+
} catch {
|
|
116
|
+
return 0
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Check if a file exists
|
|
122
|
+
*/
|
|
123
|
+
const fileExists = (filePath: string): boolean => {
|
|
124
|
+
try {
|
|
125
|
+
fs.accessSync(filePath)
|
|
126
|
+
return true
|
|
127
|
+
} catch {
|
|
128
|
+
return false
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ==========================================================================
|
|
133
|
+
// Small Corpus Tests (50-100 docs)
|
|
134
|
+
// ==========================================================================
|
|
135
|
+
|
|
136
|
+
describe('Small Corpus Tests', () => {
|
|
137
|
+
it('indexes small corpus without errors', async () => {
|
|
138
|
+
createSmallCorpus(tempDir)
|
|
139
|
+
|
|
140
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
141
|
+
|
|
142
|
+
expect(result.totalDocuments).toBeGreaterThanOrEqual(60)
|
|
143
|
+
expect(result.totalSections).toBeGreaterThan(0)
|
|
144
|
+
expect(result.errors).toHaveLength(0)
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
it('creates .mdcontext directory structure', async () => {
|
|
148
|
+
createSmallCorpus(tempDir)
|
|
149
|
+
|
|
150
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
151
|
+
|
|
152
|
+
const mdcontextDir = path.join(tempDir, '.mdcontext')
|
|
153
|
+
const indexesDir = path.join(mdcontextDir, 'indexes')
|
|
154
|
+
expect(fileExists(mdcontextDir)).toBe(true)
|
|
155
|
+
expect(fileExists(path.join(mdcontextDir, 'config.json'))).toBe(true)
|
|
156
|
+
expect(fileExists(path.join(indexesDir, 'documents.json'))).toBe(true)
|
|
157
|
+
expect(fileExists(path.join(indexesDir, 'sections.json'))).toBe(true)
|
|
158
|
+
expect(fileExists(path.join(indexesDir, 'links.json'))).toBe(true)
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
it('verifies binary format is used for vector metadata', async () => {
|
|
162
|
+
createSmallCorpus(tempDir)
|
|
163
|
+
|
|
164
|
+
// Build index and embeddings
|
|
165
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
166
|
+
|
|
167
|
+
// Add a mock vector entry to test format
|
|
168
|
+
const vectorStore = createVectorStore(tempDir, 512)
|
|
169
|
+
await Effect.runPromise(
|
|
170
|
+
vectorStore.add([
|
|
171
|
+
{
|
|
172
|
+
id: 'test-1',
|
|
173
|
+
sectionId: 'sec-1',
|
|
174
|
+
documentPath: 'test.md',
|
|
175
|
+
heading: 'Test',
|
|
176
|
+
embedding: Array(512).fill(0.1),
|
|
177
|
+
},
|
|
178
|
+
]),
|
|
179
|
+
)
|
|
180
|
+
await Effect.runPromise(vectorStore.save())
|
|
181
|
+
|
|
182
|
+
// Check that binary format (.bin) is created, not JSON
|
|
183
|
+
const metaPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
|
|
184
|
+
const jsonPath = path.join(tempDir, '.mdcontext', 'vectors.meta.json')
|
|
185
|
+
|
|
186
|
+
expect(fileExists(metaPath)).toBe(true)
|
|
187
|
+
expect(fileExists(jsonPath)).toBe(false)
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
it('loads index successfully after creation', async () => {
|
|
191
|
+
createSmallCorpus(tempDir)
|
|
192
|
+
|
|
193
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
194
|
+
|
|
195
|
+
// Verify we can load the created index
|
|
196
|
+
const storage = createStorage(tempDir)
|
|
197
|
+
const docIndex = await Effect.runPromise(loadDocumentIndex(storage))
|
|
198
|
+
const sectionIndex = await Effect.runPromise(loadSectionIndex(storage))
|
|
199
|
+
|
|
200
|
+
expect(docIndex).not.toBeNull()
|
|
201
|
+
expect(sectionIndex).not.toBeNull()
|
|
202
|
+
expect(Object.keys(docIndex!.documents).length).toBeGreaterThanOrEqual(60)
|
|
203
|
+
expect(Object.keys(sectionIndex!.sections).length).toBeGreaterThan(0)
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
it('incremental index skips unchanged files', async () => {
|
|
207
|
+
createSmallCorpus(tempDir)
|
|
208
|
+
|
|
209
|
+
// First index
|
|
210
|
+
const firstResult = await Effect.runPromise(buildIndex(tempDir))
|
|
211
|
+
const firstIndexed = firstResult.documentsIndexed
|
|
212
|
+
|
|
213
|
+
// Second index without changes
|
|
214
|
+
const secondResult = await Effect.runPromise(buildIndex(tempDir))
|
|
215
|
+
|
|
216
|
+
expect(secondResult.documentsIndexed).toBe(0)
|
|
217
|
+
expect(secondResult.skipped.unchanged).toBe(firstIndexed)
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
it('force flag rebuilds entire index', async () => {
|
|
221
|
+
createSmallCorpus(tempDir)
|
|
222
|
+
|
|
223
|
+
// First index
|
|
224
|
+
const firstResult = await Effect.runPromise(buildIndex(tempDir))
|
|
225
|
+
const totalDocs = firstResult.totalDocuments
|
|
226
|
+
|
|
227
|
+
// Force rebuild
|
|
228
|
+
const secondResult = await Effect.runPromise(
|
|
229
|
+
buildIndex(tempDir, { force: true }),
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
expect(secondResult.documentsIndexed).toBe(totalDocs)
|
|
233
|
+
expect(secondResult.skipped.unchanged).toBe(0)
|
|
234
|
+
})
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
// ==========================================================================
|
|
238
|
+
// Large Corpus Tests (1000+ docs)
|
|
239
|
+
// ==========================================================================
|
|
240
|
+
|
|
241
|
+
describe('Large Corpus Tests', () => {
|
|
242
|
+
it('indexes large corpus without errors', async () => {
|
|
243
|
+
createLargeCorpus(tempDir)
|
|
244
|
+
|
|
245
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
246
|
+
|
|
247
|
+
expect(result.totalDocuments).toBeGreaterThanOrEqual(1000)
|
|
248
|
+
expect(result.totalSections).toBeGreaterThan(3000)
|
|
249
|
+
expect(result.errors).toHaveLength(0)
|
|
250
|
+
expect(result.duration).toBeGreaterThan(0)
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
it('verifies MessagePack handles large metadata efficiently', async () => {
|
|
254
|
+
createLargeCorpus(tempDir)
|
|
255
|
+
|
|
256
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
257
|
+
|
|
258
|
+
const metaPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
|
|
259
|
+
|
|
260
|
+
// Create a large vector store with some entries to test MessagePack
|
|
261
|
+
const vectorStore = createVectorStore(tempDir, 512)
|
|
262
|
+
await Effect.runPromise(
|
|
263
|
+
vectorStore.add([
|
|
264
|
+
{
|
|
265
|
+
id: 'test-1',
|
|
266
|
+
sectionId: 'sec-1',
|
|
267
|
+
documentPath: 'test.md',
|
|
268
|
+
heading: 'Test',
|
|
269
|
+
embedding: Array(512).fill(0.1),
|
|
270
|
+
},
|
|
271
|
+
]),
|
|
272
|
+
)
|
|
273
|
+
await Effect.runPromise(vectorStore.save())
|
|
274
|
+
|
|
275
|
+
// Binary file should exist
|
|
276
|
+
expect(fileExists(metaPath)).toBe(true)
|
|
277
|
+
|
|
278
|
+
// File should have reasonable size (MessagePack is efficient)
|
|
279
|
+
const size = getFileSize(metaPath)
|
|
280
|
+
expect(size).toBeGreaterThan(0)
|
|
281
|
+
})
|
|
282
|
+
|
|
283
|
+
it('checks file sizes are reasonable for large corpus', async () => {
|
|
284
|
+
createLargeCorpus(tempDir)
|
|
285
|
+
|
|
286
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
287
|
+
|
|
288
|
+
// Check document index size
|
|
289
|
+
const docPath = path.join(
|
|
290
|
+
tempDir,
|
|
291
|
+
'.mdcontext',
|
|
292
|
+
'indexes',
|
|
293
|
+
'documents.json',
|
|
294
|
+
)
|
|
295
|
+
const docSize = getFileSize(docPath)
|
|
296
|
+
expect(docSize).toBeGreaterThan(0)
|
|
297
|
+
expect(docSize).toBeLessThan(50_000_000) // < 50MB reasonable for 1000+ docs
|
|
298
|
+
|
|
299
|
+
// Check section index size
|
|
300
|
+
const sectionPath = path.join(
|
|
301
|
+
tempDir,
|
|
302
|
+
'.mdcontext',
|
|
303
|
+
'indexes',
|
|
304
|
+
'sections.json',
|
|
305
|
+
)
|
|
306
|
+
const sectionSize = getFileSize(sectionPath)
|
|
307
|
+
expect(sectionSize).toBeGreaterThan(0)
|
|
308
|
+
expect(sectionSize).toBeLessThan(100_000_000) // < 100MB reasonable
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
it('large corpus can be loaded after indexing', async () => {
|
|
312
|
+
createLargeCorpus(tempDir)
|
|
313
|
+
|
|
314
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
315
|
+
|
|
316
|
+
const storage = createStorage(tempDir)
|
|
317
|
+
const docIndex = await Effect.runPromise(loadDocumentIndex(storage))
|
|
318
|
+
const sectionIndex = await Effect.runPromise(loadSectionIndex(storage))
|
|
319
|
+
|
|
320
|
+
expect(docIndex).not.toBeNull()
|
|
321
|
+
expect(sectionIndex).not.toBeNull()
|
|
322
|
+
expect(Object.keys(docIndex!.documents).length).toBeGreaterThanOrEqual(
|
|
323
|
+
1000,
|
|
324
|
+
)
|
|
325
|
+
expect(Object.keys(sectionIndex!.sections).length).toBeGreaterThanOrEqual(
|
|
326
|
+
3000,
|
|
327
|
+
)
|
|
328
|
+
})
|
|
329
|
+
|
|
330
|
+
it('processes large corpus in reasonable time', async () => {
|
|
331
|
+
createLargeCorpus(tempDir)
|
|
332
|
+
|
|
333
|
+
const startTime = Date.now()
|
|
334
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
335
|
+
const duration = Date.now() - startTime
|
|
336
|
+
|
|
337
|
+
// Should complete within reasonable time (adjust based on CI performance)
|
|
338
|
+
expect(duration).toBeLessThan(60_000) // < 60 seconds
|
|
339
|
+
expect(result.duration).toBeGreaterThan(0)
|
|
340
|
+
}, 60000)
|
|
341
|
+
}, 120000)
|
|
342
|
+
|
|
343
|
+
// ==========================================================================
|
|
344
|
+
// Metadata and Binary Format Tests
|
|
345
|
+
// ==========================================================================
|
|
346
|
+
|
|
347
|
+
describe('Metadata and Binary Format Tests', () => {
|
|
348
|
+
it('verifies vectors.meta.bin is created not vectors.meta.json', async () => {
|
|
349
|
+
createSmallCorpus(tempDir)
|
|
350
|
+
|
|
351
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
352
|
+
|
|
353
|
+
const vectorStore = createVectorStore(tempDir, 512)
|
|
354
|
+
await Effect.runPromise(
|
|
355
|
+
vectorStore.add([
|
|
356
|
+
{
|
|
357
|
+
id: 'test-1',
|
|
358
|
+
sectionId: 'sec-1',
|
|
359
|
+
documentPath: 'test.md',
|
|
360
|
+
heading: 'Test',
|
|
361
|
+
embedding: Array(512).fill(0.1),
|
|
362
|
+
},
|
|
363
|
+
]),
|
|
364
|
+
)
|
|
365
|
+
await Effect.runPromise(vectorStore.save())
|
|
366
|
+
|
|
367
|
+
const binPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
|
|
368
|
+
const jsonPath = path.join(tempDir, '.mdcontext', 'vectors.meta.json')
|
|
369
|
+
|
|
370
|
+
expect(fileExists(binPath)).toBe(true)
|
|
371
|
+
expect(fileExists(jsonPath)).toBe(false)
|
|
372
|
+
})
|
|
373
|
+
|
|
374
|
+
it('binary metadata can be loaded after saving', async () => {
|
|
375
|
+
createSmallCorpus(tempDir)
|
|
376
|
+
|
|
377
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
378
|
+
|
|
379
|
+
// Save vector store with data
|
|
380
|
+
const vectorStore1 = createVectorStore(tempDir, 512)
|
|
381
|
+
await Effect.runPromise(
|
|
382
|
+
vectorStore1.add([
|
|
383
|
+
{
|
|
384
|
+
id: 'test-1',
|
|
385
|
+
sectionId: 'sec-1',
|
|
386
|
+
documentPath: 'test.md',
|
|
387
|
+
heading: 'Test',
|
|
388
|
+
embedding: Array(512).fill(0.1),
|
|
389
|
+
},
|
|
390
|
+
]),
|
|
391
|
+
)
|
|
392
|
+
await Effect.runPromise(vectorStore1.save())
|
|
393
|
+
|
|
394
|
+
// Load vector store
|
|
395
|
+
const vectorStore2 = createVectorStore(tempDir, 512)
|
|
396
|
+
const loadResult = await Effect.runPromise(vectorStore2.load())
|
|
397
|
+
|
|
398
|
+
expect(loadResult.loaded).toBe(true)
|
|
399
|
+
})
|
|
400
|
+
|
|
401
|
+
it('handles metadata size warnings for large corpora', async () => {
|
|
402
|
+
createLargeCorpus(tempDir)
|
|
403
|
+
|
|
404
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
405
|
+
|
|
406
|
+
const vectorStore = createVectorStore(tempDir, 512)
|
|
407
|
+
await Effect.runPromise(
|
|
408
|
+
vectorStore.add([
|
|
409
|
+
{
|
|
410
|
+
id: 'test-1',
|
|
411
|
+
sectionId: 'sec-1',
|
|
412
|
+
documentPath: 'test.md',
|
|
413
|
+
heading: 'Test',
|
|
414
|
+
embedding: Array(512).fill(0.1),
|
|
415
|
+
},
|
|
416
|
+
]),
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
// Capture console.warn calls
|
|
420
|
+
const originalWarn = console.warn
|
|
421
|
+
const warnings: string[] = []
|
|
422
|
+
console.warn = (msg: string) => warnings.push(msg)
|
|
423
|
+
|
|
424
|
+
try {
|
|
425
|
+
await Effect.runPromise(vectorStore.save())
|
|
426
|
+
|
|
427
|
+
// For very large corpora (>100MB), a warning should appear
|
|
428
|
+
// This test verifies the warning system works
|
|
429
|
+
const metaPath = path.join(tempDir, '.mdcontext', 'vectors.meta.bin')
|
|
430
|
+
const size = getFileSize(metaPath)
|
|
431
|
+
|
|
432
|
+
if (size > 100_000_000) {
|
|
433
|
+
expect(warnings.some((w) => w.includes('Large metadata'))).toBe(true)
|
|
434
|
+
}
|
|
435
|
+
} finally {
|
|
436
|
+
console.warn = originalWarn
|
|
437
|
+
}
|
|
438
|
+
})
|
|
439
|
+
})
|
|
440
|
+
|
|
441
|
+
// ==========================================================================
|
|
442
|
+
// Vector Store Loading Tests
|
|
443
|
+
// ==========================================================================
|
|
444
|
+
|
|
445
|
+
describe('Vector Store Loading Tests', () => {
|
|
446
|
+
it('vector store loads successfully after index creation', async () => {
|
|
447
|
+
createSmallCorpus(tempDir)
|
|
448
|
+
|
|
449
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
450
|
+
|
|
451
|
+
const vectorStore = createVectorStore(tempDir, 512)
|
|
452
|
+
await Effect.runPromise(
|
|
453
|
+
vectorStore.add([
|
|
454
|
+
{
|
|
455
|
+
id: 'test-1',
|
|
456
|
+
sectionId: 'sec-1',
|
|
457
|
+
documentPath: 'test.md',
|
|
458
|
+
heading: 'Test',
|
|
459
|
+
embedding: Array(512).fill(0.1),
|
|
460
|
+
},
|
|
461
|
+
]),
|
|
462
|
+
)
|
|
463
|
+
await Effect.runPromise(vectorStore.save())
|
|
464
|
+
|
|
465
|
+
const loadResult = await Effect.runPromise(vectorStore.load())
|
|
466
|
+
|
|
467
|
+
expect(loadResult.loaded).toBe(true)
|
|
468
|
+
expect(loadResult.hnswMismatch).toBeUndefined()
|
|
469
|
+
})
|
|
470
|
+
|
|
471
|
+
it('detects dimension mismatch on load', async () => {
|
|
472
|
+
createSmallCorpus(tempDir)
|
|
473
|
+
|
|
474
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
475
|
+
|
|
476
|
+
// Save with 512 dimensions
|
|
477
|
+
const vectorStore1 = createVectorStore(tempDir, 512)
|
|
478
|
+
await Effect.runPromise(
|
|
479
|
+
vectorStore1.add([
|
|
480
|
+
{
|
|
481
|
+
id: 'test-1',
|
|
482
|
+
sectionId: 'sec-1',
|
|
483
|
+
documentPath: 'test.md',
|
|
484
|
+
heading: 'Test',
|
|
485
|
+
embedding: Array(512).fill(0.1),
|
|
486
|
+
},
|
|
487
|
+
]),
|
|
488
|
+
)
|
|
489
|
+
await Effect.runPromise(vectorStore1.save())
|
|
490
|
+
|
|
491
|
+
// Try to load with different dimensions
|
|
492
|
+
const vectorStore2 = createVectorStore(tempDir, 768)
|
|
493
|
+
|
|
494
|
+
await expect(
|
|
495
|
+
Effect.runPromise(vectorStore2.load()),
|
|
496
|
+
).rejects.toThrowError()
|
|
497
|
+
})
|
|
498
|
+
|
|
499
|
+
it('returns false loaded status when files do not exist', async () => {
|
|
500
|
+
const vectorStore = createVectorStore(tempDir, 512)
|
|
501
|
+
const loadResult = await Effect.runPromise(vectorStore.load())
|
|
502
|
+
|
|
503
|
+
expect(loadResult.loaded).toBe(false)
|
|
504
|
+
})
|
|
505
|
+
|
|
506
|
+
it('preserves provider metadata across save/load', async () => {
|
|
507
|
+
createSmallCorpus(tempDir)
|
|
508
|
+
|
|
509
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
510
|
+
|
|
511
|
+
// Save with provider metadata
|
|
512
|
+
const vectorStore1 = createVectorStore(tempDir, 512) as HnswVectorStore
|
|
513
|
+
vectorStore1.setProvider('openai', 'text-embedding-3-small', undefined)
|
|
514
|
+
await Effect.runPromise(
|
|
515
|
+
vectorStore1.add([
|
|
516
|
+
{
|
|
517
|
+
id: 'test-1',
|
|
518
|
+
sectionId: 'sec-1',
|
|
519
|
+
documentPath: 'test.md',
|
|
520
|
+
heading: 'Test',
|
|
521
|
+
embedding: Array(512).fill(0.1),
|
|
522
|
+
},
|
|
523
|
+
]),
|
|
524
|
+
)
|
|
525
|
+
await Effect.runPromise(vectorStore1.save())
|
|
526
|
+
|
|
527
|
+
// Load and verify metadata preserved
|
|
528
|
+
const vectorStore2 = createVectorStore(tempDir, 512)
|
|
529
|
+
await Effect.runPromise(vectorStore2.load())
|
|
530
|
+
const stats = vectorStore2.getStats()
|
|
531
|
+
|
|
532
|
+
expect(stats.provider).toBe('openai')
|
|
533
|
+
expect(stats.providerModel).toBe('text-embedding-3-small')
|
|
534
|
+
expect(stats.dimensions).toBe(512)
|
|
535
|
+
})
|
|
536
|
+
|
|
537
|
+
it('handles HNSW parameter mismatch detection', async () => {
|
|
538
|
+
createSmallCorpus(tempDir)
|
|
539
|
+
|
|
540
|
+
await Effect.runPromise(buildIndex(tempDir))
|
|
541
|
+
|
|
542
|
+
// Save with specific HNSW params
|
|
543
|
+
const vectorStore1 = createVectorStore(tempDir, 512, {
|
|
544
|
+
m: 16,
|
|
545
|
+
efConstruction: 200,
|
|
546
|
+
})
|
|
547
|
+
await Effect.runPromise(
|
|
548
|
+
vectorStore1.add([
|
|
549
|
+
{
|
|
550
|
+
id: 'test-1',
|
|
551
|
+
sectionId: 'sec-1',
|
|
552
|
+
documentPath: 'test.md',
|
|
553
|
+
heading: 'Test',
|
|
554
|
+
embedding: Array(512).fill(0.1),
|
|
555
|
+
},
|
|
556
|
+
]),
|
|
557
|
+
)
|
|
558
|
+
await Effect.runPromise(vectorStore1.save())
|
|
559
|
+
|
|
560
|
+
// Load with different HNSW params
|
|
561
|
+
const vectorStore2 = createVectorStore(tempDir, 512, {
|
|
562
|
+
m: 24,
|
|
563
|
+
efConstruction: 256,
|
|
564
|
+
})
|
|
565
|
+
const loadResult = await Effect.runPromise(vectorStore2.load())
|
|
566
|
+
|
|
567
|
+
expect(loadResult.loaded).toBe(true)
|
|
568
|
+
expect(loadResult.hnswMismatch).toBeDefined()
|
|
569
|
+
expect(loadResult.hnswMismatch?.configParams.m).toBe(24)
|
|
570
|
+
expect(loadResult.hnswMismatch?.indexParams.m).toBe(16)
|
|
571
|
+
})
|
|
572
|
+
})
|
|
573
|
+
|
|
574
|
+
// ==========================================================================
|
|
575
|
+
// Edge Cases and Error Handling
|
|
576
|
+
// ==========================================================================
|
|
577
|
+
|
|
578
|
+
describe('Edge Cases and Error Handling', () => {
|
|
579
|
+
it('handles empty corpus gracefully', async () => {
|
|
580
|
+
// Create directory but no files
|
|
581
|
+
fs.mkdirSync(path.join(tempDir, 'empty'), { recursive: true })
|
|
582
|
+
|
|
583
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
584
|
+
|
|
585
|
+
expect(result.totalDocuments).toBe(0)
|
|
586
|
+
expect(result.totalSections).toBe(0)
|
|
587
|
+
expect(result.errors).toHaveLength(0)
|
|
588
|
+
})
|
|
589
|
+
|
|
590
|
+
it('handles corpus with only hidden files', async () => {
|
|
591
|
+
// Create only hidden files
|
|
592
|
+
createMarkdownFile(
|
|
593
|
+
path.join(tempDir, '.hidden', 'file.md'),
|
|
594
|
+
'# Hidden\n\nHidden file.',
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
598
|
+
|
|
599
|
+
expect(result.totalDocuments).toBe(0)
|
|
600
|
+
expect(result.skipped.hidden).toBeGreaterThan(0)
|
|
601
|
+
})
|
|
602
|
+
|
|
603
|
+
it('handles corpus with excluded patterns', async () => {
|
|
604
|
+
createSmallCorpus(tempDir)
|
|
605
|
+
|
|
606
|
+
const result = await Effect.runPromise(
|
|
607
|
+
buildIndex(tempDir, { exclude: ['docs/**'] }),
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
// Should skip docs directory
|
|
611
|
+
expect(result.totalDocuments).toBeLessThan(60)
|
|
612
|
+
expect(result.skipped.excluded).toBeGreaterThan(0)
|
|
613
|
+
})
|
|
614
|
+
|
|
615
|
+
it('handles files with parsing errors', async () => {
|
|
616
|
+
// Create invalid markdown file
|
|
617
|
+
createMarkdownFile(
|
|
618
|
+
path.join(tempDir, 'invalid.md'),
|
|
619
|
+
'# Test\n\nInvalid content',
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
623
|
+
|
|
624
|
+
// Should still complete successfully
|
|
625
|
+
expect(result.totalDocuments).toBeGreaterThanOrEqual(0)
|
|
626
|
+
})
|
|
627
|
+
|
|
628
|
+
it('handles .gitignore patterns correctly', async () => {
|
|
629
|
+
createSmallCorpus(tempDir)
|
|
630
|
+
|
|
631
|
+
// Create .gitignore
|
|
632
|
+
fs.writeFileSync(
|
|
633
|
+
path.join(tempDir, '.gitignore'),
|
|
634
|
+
'docs/\n*.tmp\n',
|
|
635
|
+
'utf-8',
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
639
|
+
|
|
640
|
+
// Should respect .gitignore
|
|
641
|
+
expect(result.skipped.excluded).toBeGreaterThan(0)
|
|
642
|
+
})
|
|
643
|
+
|
|
644
|
+
it('handles .mdcontextignore patterns correctly', async () => {
|
|
645
|
+
createSmallCorpus(tempDir)
|
|
646
|
+
|
|
647
|
+
// Create .mdcontextignore
|
|
648
|
+
fs.writeFileSync(
|
|
649
|
+
path.join(tempDir, '.mdcontextignore'),
|
|
650
|
+
'guides/\n',
|
|
651
|
+
'utf-8',
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
655
|
+
|
|
656
|
+
// Should respect .mdcontextignore
|
|
657
|
+
expect(result.skipped.excluded).toBeGreaterThan(0)
|
|
658
|
+
})
|
|
659
|
+
})
|
|
660
|
+
|
|
661
|
+
// ==========================================================================
|
|
662
|
+
// Performance and Scalability Tests
|
|
663
|
+
// ==========================================================================
|
|
664
|
+
|
|
665
|
+
describe('Performance and Scalability', () => {
|
|
666
|
+
it('indexes scale linearly with corpus size', async () => {
|
|
667
|
+
// Small corpus baseline
|
|
668
|
+
createSmallCorpus(tempDir)
|
|
669
|
+
const smallResult = await Effect.runPromise(buildIndex(tempDir))
|
|
670
|
+
const smallTimePerDoc = smallResult.duration / smallResult.totalDocuments
|
|
671
|
+
|
|
672
|
+
// Clean and create larger corpus
|
|
673
|
+
fs.rmSync(tempDir, { recursive: true, force: true })
|
|
674
|
+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'mdcontext-embed-int-'))
|
|
675
|
+
createLargeCorpus(tempDir)
|
|
676
|
+
|
|
677
|
+
const largeResult = await Effect.runPromise(buildIndex(tempDir))
|
|
678
|
+
const largeTimePerDoc = largeResult.duration / largeResult.totalDocuments
|
|
679
|
+
|
|
680
|
+
// Time per document should be roughly similar (within 3x)
|
|
681
|
+
expect(largeTimePerDoc).toBeLessThan(smallTimePerDoc * 3)
|
|
682
|
+
})
|
|
683
|
+
|
|
684
|
+
it('section index grows proportionally to documents', async () => {
|
|
685
|
+
createSmallCorpus(tempDir)
|
|
686
|
+
|
|
687
|
+
const result = await Effect.runPromise(buildIndex(tempDir))
|
|
688
|
+
|
|
689
|
+
// Each document has ~3 sections, ratio should be reasonable
|
|
690
|
+
const ratio = result.totalSections / result.totalDocuments
|
|
691
|
+
expect(ratio).toBeGreaterThan(2)
|
|
692
|
+
expect(ratio).toBeLessThan(10)
|
|
693
|
+
})
|
|
694
|
+
|
|
695
|
+
it('handles repeated index/rebuild cycles', async () => {
|
|
696
|
+
createSmallCorpus(tempDir)
|
|
697
|
+
|
|
698
|
+
// Run multiple index cycles
|
|
699
|
+
for (let i = 0; i < 5; i++) {
|
|
700
|
+
const result = await Effect.runPromise(
|
|
701
|
+
buildIndex(tempDir, { force: true }),
|
|
702
|
+
)
|
|
703
|
+
expect(result.totalDocuments).toBeGreaterThanOrEqual(60)
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// Final verification
|
|
707
|
+
const storage = createStorage(tempDir)
|
|
708
|
+
const docIndex = await Effect.runPromise(loadDocumentIndex(storage))
|
|
709
|
+
expect(docIndex).not.toBeNull()
|
|
710
|
+
})
|
|
711
|
+
})
|
|
712
|
+
})
|