mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,684 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Indexer service for building and updating indexes
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import * as fs from 'node:fs/promises'
|
|
6
|
+
import * as path from 'node:path'
|
|
7
|
+
import { Effect } from 'effect'
|
|
8
|
+
import type { Ignore } from 'ignore'
|
|
9
|
+
import type { MdSection } from '../core/types.js'
|
|
10
|
+
import {
|
|
11
|
+
type DirectoryCreateError,
|
|
12
|
+
DirectoryWalkError,
|
|
13
|
+
type FileReadError,
|
|
14
|
+
type FileWriteError,
|
|
15
|
+
type IndexCorruptedError,
|
|
16
|
+
ParseError,
|
|
17
|
+
} from '../errors/index.js'
|
|
18
|
+
import { parse } from '../parser/parser.js'
|
|
19
|
+
import { createIgnoreFilter, shouldIgnore } from './ignore-patterns.js'
|
|
20
|
+
import {
|
|
21
|
+
computeHash,
|
|
22
|
+
createEmptyDocumentIndex,
|
|
23
|
+
createEmptyLinkIndex,
|
|
24
|
+
createEmptySectionIndex,
|
|
25
|
+
createStorage,
|
|
26
|
+
initializeIndex,
|
|
27
|
+
loadDocumentIndex,
|
|
28
|
+
loadLinkIndex,
|
|
29
|
+
loadSectionIndex,
|
|
30
|
+
saveDocumentIndex,
|
|
31
|
+
saveLinkIndex,
|
|
32
|
+
saveSectionIndex,
|
|
33
|
+
} from './storage.js'
|
|
34
|
+
import type {
|
|
35
|
+
DocumentEntry,
|
|
36
|
+
DocumentIndex,
|
|
37
|
+
FileProcessingError,
|
|
38
|
+
IndexResult,
|
|
39
|
+
SectionEntry,
|
|
40
|
+
SkipSummary,
|
|
41
|
+
} from './types.js'
|
|
42
|
+
|
|
43
|
+
// ============================================================================
|
|
44
|
+
// File Discovery
|
|
45
|
+
// ============================================================================
|
|
46
|
+
|
|
47
|
+
const isMarkdownFile = (filename: string): boolean =>
|
|
48
|
+
filename.endsWith('.md') || filename.endsWith('.mdx')
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Result of directory walk including tracked skip counts
|
|
52
|
+
*/
|
|
53
|
+
interface WalkResult {
|
|
54
|
+
readonly files: string[]
|
|
55
|
+
readonly skipped: {
|
|
56
|
+
hidden: number
|
|
57
|
+
excluded: number
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Walk directory using ignore filter for pattern matching.
|
|
63
|
+
*
|
|
64
|
+
* @param dir - Directory to walk
|
|
65
|
+
* @param rootPath - Root path for computing relative paths
|
|
66
|
+
* @param filter - Ignore filter instance
|
|
67
|
+
* @returns Walk result with files and skip counts
|
|
68
|
+
*/
|
|
69
|
+
const walkDirectory = async (
|
|
70
|
+
dir: string,
|
|
71
|
+
rootPath: string,
|
|
72
|
+
filter: Ignore,
|
|
73
|
+
): Promise<WalkResult> => {
|
|
74
|
+
const files: string[] = []
|
|
75
|
+
let hiddenCount = 0
|
|
76
|
+
let excludedCount = 0
|
|
77
|
+
const entries = await fs.readdir(dir, { withFileTypes: true })
|
|
78
|
+
|
|
79
|
+
for (const entry of entries) {
|
|
80
|
+
const fullPath = path.join(dir, entry.name)
|
|
81
|
+
const relativePath = path.relative(rootPath, fullPath)
|
|
82
|
+
|
|
83
|
+
// Skip hidden files/directories (starting with .)
|
|
84
|
+
if (entry.name.startsWith('.')) {
|
|
85
|
+
if (entry.isDirectory()) {
|
|
86
|
+
hiddenCount++
|
|
87
|
+
}
|
|
88
|
+
continue
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Check ignore filter for both files and directories
|
|
92
|
+
if (shouldIgnore(relativePath, filter)) {
|
|
93
|
+
if (entry.isDirectory()) {
|
|
94
|
+
excludedCount++
|
|
95
|
+
} else {
|
|
96
|
+
excludedCount++
|
|
97
|
+
}
|
|
98
|
+
continue
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (entry.isDirectory()) {
|
|
102
|
+
const subResult = await walkDirectory(fullPath, rootPath, filter)
|
|
103
|
+
files.push(...subResult.files)
|
|
104
|
+
hiddenCount += subResult.skipped.hidden
|
|
105
|
+
excludedCount += subResult.skipped.excluded
|
|
106
|
+
} else if (entry.isFile() && isMarkdownFile(entry.name)) {
|
|
107
|
+
files.push(fullPath)
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return { files, skipped: { hidden: hiddenCount, excluded: excludedCount } }
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ============================================================================
|
|
115
|
+
// Section Flattening
|
|
116
|
+
// ============================================================================
|
|
117
|
+
|
|
118
|
+
const flattenSections = (
|
|
119
|
+
sections: readonly MdSection[],
|
|
120
|
+
docId: string,
|
|
121
|
+
docPath: string,
|
|
122
|
+
): SectionEntry[] => {
|
|
123
|
+
const result: SectionEntry[] = []
|
|
124
|
+
|
|
125
|
+
const traverse = (section: MdSection): void => {
|
|
126
|
+
result.push({
|
|
127
|
+
id: section.id,
|
|
128
|
+
documentId: docId,
|
|
129
|
+
documentPath: docPath,
|
|
130
|
+
heading: section.heading,
|
|
131
|
+
level: section.level,
|
|
132
|
+
startLine: section.startLine,
|
|
133
|
+
endLine: section.endLine,
|
|
134
|
+
tokenCount: section.metadata.tokenCount,
|
|
135
|
+
hasCode: section.metadata.hasCode,
|
|
136
|
+
hasList: section.metadata.hasList,
|
|
137
|
+
hasTable: section.metadata.hasTable,
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
for (const child of section.children) {
|
|
141
|
+
traverse(child)
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
for (const section of sections) {
|
|
146
|
+
traverse(section)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return result
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ============================================================================
|
|
153
|
+
// Link Resolution
|
|
154
|
+
// ============================================================================
|
|
155
|
+
|
|
156
|
+
const resolveInternalLink = (
|
|
157
|
+
href: string,
|
|
158
|
+
fromPath: string,
|
|
159
|
+
rootPath: string,
|
|
160
|
+
): string | null => {
|
|
161
|
+
if (href.startsWith('#')) {
|
|
162
|
+
return fromPath
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (href.startsWith('http://') || href.startsWith('https://')) {
|
|
166
|
+
return null
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const linkPath = href.split('#')[0] ?? ''
|
|
170
|
+
if (!linkPath) return null
|
|
171
|
+
|
|
172
|
+
const fromDir = path.dirname(fromPath)
|
|
173
|
+
const resolved = path.resolve(fromDir, linkPath)
|
|
174
|
+
|
|
175
|
+
if (!resolved.startsWith(rootPath)) {
|
|
176
|
+
return null
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return path.relative(rootPath, resolved)
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ============================================================================
|
|
183
|
+
// Index Building
|
|
184
|
+
// ============================================================================
|
|
185
|
+
|
|
186
|
+
export interface IndexProgress {
|
|
187
|
+
readonly current: number
|
|
188
|
+
readonly total: number
|
|
189
|
+
readonly filePath: string
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export interface IndexOptions {
|
|
193
|
+
readonly force?: boolean | undefined
|
|
194
|
+
/** CLI/config exclude patterns (overrides ignore files) */
|
|
195
|
+
readonly exclude?: readonly string[] | undefined
|
|
196
|
+
/** Whether to honor .gitignore (default: true) */
|
|
197
|
+
readonly honorGitignore?: boolean | undefined
|
|
198
|
+
/** Whether to honor .mdcontextignore (default: true) */
|
|
199
|
+
readonly honorMdcontextignore?: boolean | undefined
|
|
200
|
+
/** Callback for progress updates during file indexing */
|
|
201
|
+
readonly onProgress?: ((progress: IndexProgress) => void) | undefined
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
export const buildIndex = (
|
|
205
|
+
rootPath: string,
|
|
206
|
+
options: IndexOptions = {},
|
|
207
|
+
): Effect.Effect<
|
|
208
|
+
IndexResult,
|
|
209
|
+
| DirectoryWalkError
|
|
210
|
+
| DirectoryCreateError
|
|
211
|
+
| FileReadError
|
|
212
|
+
| FileWriteError
|
|
213
|
+
| IndexCorruptedError
|
|
214
|
+
> =>
|
|
215
|
+
Effect.gen(function* () {
|
|
216
|
+
const startTime = Date.now()
|
|
217
|
+
const storage = createStorage(rootPath)
|
|
218
|
+
const errors: FileProcessingError[] = []
|
|
219
|
+
|
|
220
|
+
// Initialize storage
|
|
221
|
+
yield* initializeIndex(storage)
|
|
222
|
+
|
|
223
|
+
// Load existing indexes or create empty ones
|
|
224
|
+
const existingDocIndex = yield* loadDocumentIndex(storage)
|
|
225
|
+
const docIndex: DocumentIndex =
|
|
226
|
+
options.force || !existingDocIndex
|
|
227
|
+
? createEmptyDocumentIndex(storage.rootPath)
|
|
228
|
+
: existingDocIndex
|
|
229
|
+
|
|
230
|
+
// Load existing section and link indexes to preserve data for unchanged files
|
|
231
|
+
const existingSectionIndex = yield* loadSectionIndex(storage)
|
|
232
|
+
const existingLinkIndex = yield* loadLinkIndex(storage)
|
|
233
|
+
const sectionIndex = existingSectionIndex ?? createEmptySectionIndex()
|
|
234
|
+
const linkIndex = existingLinkIndex ?? createEmptyLinkIndex()
|
|
235
|
+
|
|
236
|
+
// Build ignore filter with proper precedence:
|
|
237
|
+
// CLI/config patterns > .mdcontextignore > .gitignore > defaults
|
|
238
|
+
const ignoreResult = yield* createIgnoreFilter({
|
|
239
|
+
rootPath: storage.rootPath,
|
|
240
|
+
cliPatterns: options.exclude,
|
|
241
|
+
honorGitignore: options.honorGitignore ?? true,
|
|
242
|
+
honorMdcontextignore: options.honorMdcontextignore ?? true,
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
// Discover files using the ignore filter
|
|
246
|
+
const walkResult = yield* Effect.tryPromise({
|
|
247
|
+
try: () =>
|
|
248
|
+
walkDirectory(storage.rootPath, storage.rootPath, ignoreResult.filter),
|
|
249
|
+
catch: (e) =>
|
|
250
|
+
new DirectoryWalkError({
|
|
251
|
+
path: storage.rootPath,
|
|
252
|
+
message: `Failed to traverse directory: ${e instanceof Error ? e.message : String(e)}`,
|
|
253
|
+
cause: e,
|
|
254
|
+
}),
|
|
255
|
+
})
|
|
256
|
+
|
|
257
|
+
const { files, skipped: walkSkipped } = walkResult
|
|
258
|
+
|
|
259
|
+
// Process each file
|
|
260
|
+
let documentsIndexed = 0
|
|
261
|
+
let sectionsIndexed = 0
|
|
262
|
+
let linksIndexed = 0
|
|
263
|
+
let unchangedCount = 0
|
|
264
|
+
|
|
265
|
+
const mutableDocuments: Record<string, DocumentEntry> = {
|
|
266
|
+
...docIndex.documents,
|
|
267
|
+
}
|
|
268
|
+
const mutableSections: Record<string, SectionEntry> = {
|
|
269
|
+
...sectionIndex.sections,
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const mutableByHeading: Record<string, string[]> = Object.assign(
|
|
273
|
+
Object.create(null),
|
|
274
|
+
Object.fromEntries(
|
|
275
|
+
Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]]),
|
|
276
|
+
),
|
|
277
|
+
)
|
|
278
|
+
const mutableByDocument: Record<string, string[]> = Object.assign(
|
|
279
|
+
Object.create(null),
|
|
280
|
+
Object.fromEntries(
|
|
281
|
+
Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]]),
|
|
282
|
+
),
|
|
283
|
+
)
|
|
284
|
+
const mutableForward: Record<string, string[]> = Object.assign(
|
|
285
|
+
Object.create(null),
|
|
286
|
+
Object.fromEntries(
|
|
287
|
+
Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]]),
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
const mutableBackward: Record<string, string[]> = Object.assign(
|
|
291
|
+
Object.create(null),
|
|
292
|
+
Object.fromEntries(
|
|
293
|
+
Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]]),
|
|
294
|
+
),
|
|
295
|
+
)
|
|
296
|
+
const brokenLinks: string[] = [...linkIndex.broken]
|
|
297
|
+
const totalFiles = files.length
|
|
298
|
+
|
|
299
|
+
for (let fileIndex = 0; fileIndex < files.length; fileIndex++) {
|
|
300
|
+
const filePath = files[fileIndex]!
|
|
301
|
+
const relativePath = path.relative(storage.rootPath, filePath)
|
|
302
|
+
|
|
303
|
+
// Report progress
|
|
304
|
+
if (options.onProgress) {
|
|
305
|
+
options.onProgress({
|
|
306
|
+
current: fileIndex + 1,
|
|
307
|
+
total: totalFiles,
|
|
308
|
+
filePath: relativePath,
|
|
309
|
+
})
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Process each file, collecting errors instead of failing
|
|
313
|
+
const processFile = Effect.gen(function* () {
|
|
314
|
+
// Read file content and stats
|
|
315
|
+
const [content, stats] = yield* Effect.promise(() =>
|
|
316
|
+
Promise.all([fs.readFile(filePath, 'utf-8'), fs.stat(filePath)]),
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
const hash = computeHash(content)
|
|
320
|
+
const existingEntry = mutableDocuments[relativePath]
|
|
321
|
+
|
|
322
|
+
// Skip if unchanged
|
|
323
|
+
if (
|
|
324
|
+
!options.force &&
|
|
325
|
+
existingEntry &&
|
|
326
|
+
existingEntry.hash === hash &&
|
|
327
|
+
existingEntry.mtime === stats.mtime.getTime()
|
|
328
|
+
) {
|
|
329
|
+
unchangedCount++
|
|
330
|
+
return // File unchanged, skip processing
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Parse document
|
|
334
|
+
const doc = yield* parse(content, {
|
|
335
|
+
path: relativePath,
|
|
336
|
+
lastModified: stats.mtime,
|
|
337
|
+
}).pipe(
|
|
338
|
+
Effect.mapError(
|
|
339
|
+
(e) =>
|
|
340
|
+
new ParseError({
|
|
341
|
+
message: e.message,
|
|
342
|
+
path: relativePath,
|
|
343
|
+
...(e.line !== undefined && { line: e.line }),
|
|
344
|
+
...(e.column !== undefined && { column: e.column }),
|
|
345
|
+
}),
|
|
346
|
+
),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
// Clean up old sections for this document before adding new ones
|
|
350
|
+
if (existingEntry) {
|
|
351
|
+
const oldSectionIds = mutableByDocument[existingEntry.id] ?? []
|
|
352
|
+
for (const sectionId of oldSectionIds) {
|
|
353
|
+
const oldSection = mutableSections[sectionId]
|
|
354
|
+
if (oldSection) {
|
|
355
|
+
// Remove from byHeading
|
|
356
|
+
const headingKey = oldSection.heading.toLowerCase()
|
|
357
|
+
const headingList = mutableByHeading[headingKey]
|
|
358
|
+
if (headingList) {
|
|
359
|
+
const idx = headingList.indexOf(sectionId)
|
|
360
|
+
if (idx !== -1) headingList.splice(idx, 1)
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
delete mutableSections[sectionId]
|
|
364
|
+
}
|
|
365
|
+
delete mutableByDocument[existingEntry.id]
|
|
366
|
+
|
|
367
|
+
// Clean up old links
|
|
368
|
+
delete mutableForward[relativePath]
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Update document index
|
|
372
|
+
mutableDocuments[relativePath] = {
|
|
373
|
+
id: doc.id,
|
|
374
|
+
path: relativePath,
|
|
375
|
+
title: doc.title,
|
|
376
|
+
mtime: stats.mtime.getTime(),
|
|
377
|
+
hash,
|
|
378
|
+
tokenCount: doc.metadata.tokenCount,
|
|
379
|
+
sectionCount: doc.metadata.headingCount,
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
documentsIndexed++
|
|
383
|
+
|
|
384
|
+
// Update section index
|
|
385
|
+
const sections = flattenSections(doc.sections, doc.id, relativePath)
|
|
386
|
+
mutableByDocument[doc.id] = []
|
|
387
|
+
|
|
388
|
+
for (const section of sections) {
|
|
389
|
+
mutableSections[section.id] = section
|
|
390
|
+
mutableByDocument[doc.id]?.push(section.id)
|
|
391
|
+
|
|
392
|
+
// Index by heading
|
|
393
|
+
const headingKey = section.heading.toLowerCase()
|
|
394
|
+
if (!mutableByHeading[headingKey]) {
|
|
395
|
+
mutableByHeading[headingKey] = []
|
|
396
|
+
}
|
|
397
|
+
mutableByHeading[headingKey]?.push(section.id)
|
|
398
|
+
|
|
399
|
+
sectionsIndexed++
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// Update link index
|
|
403
|
+
const internalLinks = doc.links.filter((l) => l.type === 'internal')
|
|
404
|
+
const outgoingLinks: string[] = []
|
|
405
|
+
|
|
406
|
+
for (const link of internalLinks) {
|
|
407
|
+
const target = resolveInternalLink(
|
|
408
|
+
link.href,
|
|
409
|
+
filePath,
|
|
410
|
+
storage.rootPath,
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
if (target) {
|
|
414
|
+
outgoingLinks.push(target)
|
|
415
|
+
|
|
416
|
+
// Add to backward links
|
|
417
|
+
if (!mutableBackward[target]) {
|
|
418
|
+
mutableBackward[target] = []
|
|
419
|
+
}
|
|
420
|
+
if (!mutableBackward[target]?.includes(relativePath)) {
|
|
421
|
+
mutableBackward[target]?.push(relativePath)
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
linksIndexed++
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
mutableForward[relativePath] = outgoingLinks
|
|
429
|
+
}).pipe(
|
|
430
|
+
// Note: catchAll is intentional for batch file processing.
|
|
431
|
+
// Individual file failures should be collected in errors array
|
|
432
|
+
// rather than stopping the entire index build operation.
|
|
433
|
+
Effect.catchAll((error) => {
|
|
434
|
+
// Extract message from typed errors or generic errors
|
|
435
|
+
const message =
|
|
436
|
+
'message' in error && typeof error.message === 'string'
|
|
437
|
+
? error.message
|
|
438
|
+
: String(error)
|
|
439
|
+
errors.push({
|
|
440
|
+
path: relativePath,
|
|
441
|
+
message,
|
|
442
|
+
})
|
|
443
|
+
return Effect.void
|
|
444
|
+
}),
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
yield* processFile
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Check for broken links
|
|
451
|
+
for (const [_from, targets] of Object.entries(mutableForward)) {
|
|
452
|
+
for (const target of targets) {
|
|
453
|
+
if (!mutableDocuments[target] && !brokenLinks.includes(target)) {
|
|
454
|
+
brokenLinks.push(target)
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Save indexes
|
|
460
|
+
yield* saveDocumentIndex(storage, {
|
|
461
|
+
version: docIndex.version,
|
|
462
|
+
rootPath: storage.rootPath,
|
|
463
|
+
documents: mutableDocuments,
|
|
464
|
+
})
|
|
465
|
+
|
|
466
|
+
yield* saveSectionIndex(storage, {
|
|
467
|
+
version: sectionIndex.version,
|
|
468
|
+
sections: mutableSections,
|
|
469
|
+
byHeading: mutableByHeading,
|
|
470
|
+
byDocument: mutableByDocument,
|
|
471
|
+
})
|
|
472
|
+
|
|
473
|
+
yield* saveLinkIndex(storage, {
|
|
474
|
+
version: linkIndex.version,
|
|
475
|
+
forward: mutableForward,
|
|
476
|
+
backward: mutableBackward,
|
|
477
|
+
broken: brokenLinks,
|
|
478
|
+
})
|
|
479
|
+
|
|
480
|
+
const duration = Date.now() - startTime
|
|
481
|
+
|
|
482
|
+
// Calculate totals for all links across all forward entries
|
|
483
|
+
const totalLinks = Object.values(mutableForward).reduce(
|
|
484
|
+
(sum, links) => sum + links.length,
|
|
485
|
+
0,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
// Build skip summary
|
|
489
|
+
const skipped: SkipSummary = {
|
|
490
|
+
unchanged: unchangedCount,
|
|
491
|
+
excluded: walkSkipped.excluded,
|
|
492
|
+
hidden: walkSkipped.hidden,
|
|
493
|
+
total: unchangedCount + walkSkipped.excluded + walkSkipped.hidden,
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return {
|
|
497
|
+
documentsIndexed,
|
|
498
|
+
sectionsIndexed,
|
|
499
|
+
linksIndexed,
|
|
500
|
+
totalDocuments: Object.keys(mutableDocuments).length,
|
|
501
|
+
totalSections: Object.keys(mutableSections).length,
|
|
502
|
+
totalLinks,
|
|
503
|
+
duration,
|
|
504
|
+
errors,
|
|
505
|
+
skipped,
|
|
506
|
+
}
|
|
507
|
+
})
|
|
508
|
+
|
|
509
|
+
// ============================================================================
|
|
510
|
+
// Link Queries
|
|
511
|
+
// ============================================================================
|
|
512
|
+
|
|
513
|
+
export const getOutgoingLinks = (
|
|
514
|
+
rootPath: string,
|
|
515
|
+
filePath: string,
|
|
516
|
+
): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
|
|
517
|
+
Effect.gen(function* () {
|
|
518
|
+
const storage = createStorage(rootPath)
|
|
519
|
+
const linkIndex = yield* loadLinkIndex(storage)
|
|
520
|
+
|
|
521
|
+
if (!linkIndex) {
|
|
522
|
+
return []
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
const relativePath = path.relative(storage.rootPath, path.resolve(filePath))
|
|
526
|
+
return linkIndex.forward[relativePath] ?? []
|
|
527
|
+
})
|
|
528
|
+
|
|
529
|
+
export const getIncomingLinks = (
|
|
530
|
+
rootPath: string,
|
|
531
|
+
filePath: string,
|
|
532
|
+
): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
|
|
533
|
+
Effect.gen(function* () {
|
|
534
|
+
const storage = createStorage(rootPath)
|
|
535
|
+
const linkIndex = yield* loadLinkIndex(storage)
|
|
536
|
+
|
|
537
|
+
if (!linkIndex) {
|
|
538
|
+
return []
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
const relativePath = path.relative(storage.rootPath, path.resolve(filePath))
|
|
542
|
+
return linkIndex.backward[relativePath] ?? []
|
|
543
|
+
})
|
|
544
|
+
|
|
545
|
+
export const getBrokenLinks = (
|
|
546
|
+
rootPath: string,
|
|
547
|
+
): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
|
|
548
|
+
Effect.gen(function* () {
|
|
549
|
+
const storage = createStorage(rootPath)
|
|
550
|
+
const linkIndex = yield* loadLinkIndex(storage)
|
|
551
|
+
|
|
552
|
+
if (!linkIndex) {
|
|
553
|
+
return []
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
return linkIndex.broken
|
|
557
|
+
})
|
|
558
|
+
|
|
559
|
+
// ============================================================================
|
|
560
|
+
// BM25 Index Building
|
|
561
|
+
// ============================================================================
|
|
562
|
+
|
|
563
|
+
import { type BM25Document, createBM25Store } from '../search/bm25-store.js'
|
|
564
|
+
|
|
565
|
+
export interface BuildBM25Options {
|
|
566
|
+
readonly force?: boolean
|
|
567
|
+
readonly onProgress?: (progress: { current: number; total: number }) => void
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
export interface BuildBM25Result {
|
|
571
|
+
readonly sectionsIndexed: number
|
|
572
|
+
readonly duration: number
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
/**
|
|
576
|
+
* Build BM25 keyword index for all sections.
|
|
577
|
+
*
|
|
578
|
+
* @param rootPath - Root directory containing indexed markdown files
|
|
579
|
+
* @param options - Build options (force rebuild, progress callback)
|
|
580
|
+
* @returns Result with section count and timing
|
|
581
|
+
*/
|
|
582
|
+
export const buildBM25Index = (
|
|
583
|
+
rootPath: string,
|
|
584
|
+
options: BuildBM25Options = {},
|
|
585
|
+
): Effect.Effect<
|
|
586
|
+
BuildBM25Result,
|
|
587
|
+
FileReadError | IndexCorruptedError | FileWriteError
|
|
588
|
+
> =>
|
|
589
|
+
Effect.gen(function* () {
|
|
590
|
+
const startTime = Date.now()
|
|
591
|
+
const storage = createStorage(rootPath)
|
|
592
|
+
|
|
593
|
+
// Load section index
|
|
594
|
+
const docIndex = yield* loadDocumentIndex(storage)
|
|
595
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
596
|
+
|
|
597
|
+
if (!docIndex || !sectionIndex) {
|
|
598
|
+
return { sectionsIndexed: 0, duration: 0 }
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// Create BM25 store
|
|
602
|
+
const bm25Store = createBM25Store(storage.rootPath)
|
|
603
|
+
|
|
604
|
+
// Check if we can skip
|
|
605
|
+
if (!options.force) {
|
|
606
|
+
const loaded = yield* bm25Store.load()
|
|
607
|
+
if (loaded) {
|
|
608
|
+
const stats = bm25Store.getStats()
|
|
609
|
+
if (stats.count > 0) {
|
|
610
|
+
return { sectionsIndexed: 0, duration: Date.now() - startTime }
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Clear and rebuild
|
|
616
|
+
bm25Store.clear()
|
|
617
|
+
|
|
618
|
+
// Group sections by document for efficient file reading
|
|
619
|
+
const sectionsByDoc: Map<string, SectionEntry[]> = new Map()
|
|
620
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
621
|
+
if (section.tokenCount < 10) continue
|
|
622
|
+
const existing = sectionsByDoc.get(section.documentPath)
|
|
623
|
+
if (existing) {
|
|
624
|
+
existing.push(section)
|
|
625
|
+
} else {
|
|
626
|
+
sectionsByDoc.set(section.documentPath, [section])
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
const totalDocs = sectionsByDoc.size
|
|
631
|
+
let processedDocs = 0
|
|
632
|
+
let sectionsIndexed = 0
|
|
633
|
+
|
|
634
|
+
// Process each document
|
|
635
|
+
for (const [docPath, sections] of sectionsByDoc) {
|
|
636
|
+
const filePath = path.join(storage.rootPath, docPath)
|
|
637
|
+
|
|
638
|
+
// Read file content
|
|
639
|
+
const fileContentResult = yield* Effect.promise(() =>
|
|
640
|
+
fs.readFile(filePath, 'utf-8'),
|
|
641
|
+
).pipe(
|
|
642
|
+
Effect.map((content) => ({ ok: true as const, content })),
|
|
643
|
+
Effect.catchAll(() =>
|
|
644
|
+
Effect.succeed({ ok: false as const, content: '' }),
|
|
645
|
+
),
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if (!fileContentResult.ok) continue
|
|
649
|
+
|
|
650
|
+
const lines = fileContentResult.content.split('\n')
|
|
651
|
+
const docs: BM25Document[] = []
|
|
652
|
+
|
|
653
|
+
for (const section of sections) {
|
|
654
|
+
const content = lines
|
|
655
|
+
.slice(section.startLine - 1, section.endLine)
|
|
656
|
+
.join('\n')
|
|
657
|
+
|
|
658
|
+
docs.push({
|
|
659
|
+
id: section.id,
|
|
660
|
+
sectionId: section.id,
|
|
661
|
+
documentPath: section.documentPath,
|
|
662
|
+
heading: section.heading,
|
|
663
|
+
content,
|
|
664
|
+
})
|
|
665
|
+
sectionsIndexed++
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
yield* bm25Store.add(docs)
|
|
669
|
+
|
|
670
|
+
processedDocs++
|
|
671
|
+
if (options.onProgress) {
|
|
672
|
+
options.onProgress({ current: processedDocs, total: totalDocs })
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// Consolidate and save
|
|
677
|
+
yield* bm25Store.consolidate()
|
|
678
|
+
yield* bm25Store.save()
|
|
679
|
+
|
|
680
|
+
return {
|
|
681
|
+
sectionsIndexed,
|
|
682
|
+
duration: Date.now() - startTime,
|
|
683
|
+
}
|
|
684
|
+
})
|