mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,803 @@
|
|
|
1
|
+
// src/utils/tokens.ts
|
|
2
|
+
import { Effect } from "effect";
|
|
3
|
+
|
|
4
|
+
var encoder = null;
|
|
5
|
+
var getEncoder = Effect.gen(function* () {
|
|
6
|
+
if (encoder === null) {
|
|
7
|
+
const { get_encoding } = yield* Effect.promise(() => import("tiktoken"));
|
|
8
|
+
encoder = get_encoding("cl100k_base");
|
|
9
|
+
}
|
|
10
|
+
return encoder;
|
|
11
|
+
});
|
|
12
|
+
var countTokens = (text) =>
|
|
13
|
+
Effect.gen(function* () {
|
|
14
|
+
const enc = yield* getEncoder;
|
|
15
|
+
const tokens = enc.encode(text);
|
|
16
|
+
return tokens.length;
|
|
17
|
+
});
|
|
18
|
+
var countTokensApprox = (text) => {
|
|
19
|
+
if (text.length === 0) return 0;
|
|
20
|
+
const cjkPattern =
|
|
21
|
+
/[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af\u3400-\u4dbf]/g;
|
|
22
|
+
const cjkMatches = text.match(cjkPattern) || [];
|
|
23
|
+
const cjkCount = cjkMatches.length;
|
|
24
|
+
const emojiPattern =
|
|
25
|
+
/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F600}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{2300}-\u{23FF}\u{2190}-\u{21FF}\u{25A0}-\u{25FF}\u{2B00}-\u{2BFF}]/gu;
|
|
26
|
+
const emojiMatches = text.match(emojiPattern) || [];
|
|
27
|
+
const emojiCount = emojiMatches.length;
|
|
28
|
+
const variationSelectorPattern = /[\uFE0E\uFE0F]/g;
|
|
29
|
+
const variationMatches = text.match(variationSelectorPattern) || [];
|
|
30
|
+
const variationCount = variationMatches.length;
|
|
31
|
+
let workingText = text;
|
|
32
|
+
const codeBlockMatches = text.match(/```[\s\S]*?```/g) || [];
|
|
33
|
+
let codeBlockTokens = 0;
|
|
34
|
+
for (const block of codeBlockMatches) {
|
|
35
|
+
const hasLang = /^```\w+/.test(block);
|
|
36
|
+
const overhead = hasLang ? 6 : 4;
|
|
37
|
+
const content = block.replace(/^```\w*\n?/, "").replace(/\n?```$/, "");
|
|
38
|
+
const contentNewlines = (content.match(/\n/g) || []).length;
|
|
39
|
+
const contentTokens = content.length > 0 ? content.length / 2.5 : 0;
|
|
40
|
+
codeBlockTokens += Math.max(
|
|
41
|
+
overhead,
|
|
42
|
+
overhead + contentNewlines + contentTokens,
|
|
43
|
+
);
|
|
44
|
+
workingText = workingText.replace(block, "");
|
|
45
|
+
}
|
|
46
|
+
const inlineCodeMatches = workingText.match(/`[^`]+`/g) || [];
|
|
47
|
+
let inlineCodeTokens = 0;
|
|
48
|
+
for (const match of inlineCodeMatches) {
|
|
49
|
+
const content = match.slice(1, -1);
|
|
50
|
+
inlineCodeTokens += 2 + content.length / 2.5;
|
|
51
|
+
workingText = workingText.replace(match, "");
|
|
52
|
+
}
|
|
53
|
+
const pathMatches = workingText.match(/(?:\/[\w.-]+)+/g) || [];
|
|
54
|
+
let pathTokens = 0;
|
|
55
|
+
for (const match of pathMatches) {
|
|
56
|
+
const slashCount = (match.match(/\//g) || []).length;
|
|
57
|
+
const contentLength = match.length - slashCount;
|
|
58
|
+
pathTokens += slashCount + contentLength / 3.5;
|
|
59
|
+
workingText = workingText.replace(match, "");
|
|
60
|
+
}
|
|
61
|
+
const punctuationMatches =
|
|
62
|
+
workingText.match(/[!?,.:;'"()[\]{}@#$%^&*+=|\\<>~\-/]/g) || [];
|
|
63
|
+
const punctuationCount = punctuationMatches.length;
|
|
64
|
+
const proseNewlines = (workingText.match(/\n/g) || []).length;
|
|
65
|
+
const proseLength = Math.max(
|
|
66
|
+
0,
|
|
67
|
+
workingText.length -
|
|
68
|
+
proseNewlines -
|
|
69
|
+
cjkCount -
|
|
70
|
+
emojiCount -
|
|
71
|
+
variationCount -
|
|
72
|
+
punctuationCount,
|
|
73
|
+
);
|
|
74
|
+
const proseTokens = proseLength / 3.5;
|
|
75
|
+
const proseNewlineTokens = proseNewlines * 1;
|
|
76
|
+
const punctuationBonus = punctuationCount * 0.8;
|
|
77
|
+
const cjkTokens = cjkCount * 1.2;
|
|
78
|
+
const emojiTokens = emojiCount * 2.5;
|
|
79
|
+
const variationTokens = variationCount * 1;
|
|
80
|
+
const estimate =
|
|
81
|
+
proseTokens +
|
|
82
|
+
proseNewlineTokens +
|
|
83
|
+
codeBlockTokens +
|
|
84
|
+
inlineCodeTokens +
|
|
85
|
+
pathTokens +
|
|
86
|
+
punctuationBonus +
|
|
87
|
+
cjkTokens +
|
|
88
|
+
emojiTokens +
|
|
89
|
+
variationTokens;
|
|
90
|
+
return Math.ceil(estimate * 1.1);
|
|
91
|
+
};
|
|
92
|
+
var countWords = (text) => {
|
|
93
|
+
const trimmed = text.trim();
|
|
94
|
+
if (trimmed.length === 0) return 0;
|
|
95
|
+
return trimmed.split(/\s+/).length;
|
|
96
|
+
};
|
|
97
|
+
var freeEncoder = () => {
|
|
98
|
+
if (encoder !== null) {
|
|
99
|
+
encoder.free();
|
|
100
|
+
encoder = null;
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
// src/parser/parser.ts
|
|
105
|
+
import * as crypto from "crypto";
|
|
106
|
+
import { Effect as Effect2 } from "effect";
|
|
107
|
+
import matter from "gray-matter";
|
|
108
|
+
import remarkGfm from "remark-gfm";
|
|
109
|
+
import remarkParse from "remark-parse";
|
|
110
|
+
import { unified } from "unified";
|
|
111
|
+
import { visit } from "unist-util-visit";
|
|
112
|
+
|
|
113
|
+
var processor = unified().use(remarkParse).use(remarkGfm);
|
|
114
|
+
var generateId = (input) => {
|
|
115
|
+
return crypto.createHash("md5").update(input).digest("hex").slice(0, 12);
|
|
116
|
+
};
|
|
117
|
+
var slugify = (text) => {
|
|
118
|
+
return text
|
|
119
|
+
.toLowerCase()
|
|
120
|
+
.replace(/[^\w\s-]/g, "")
|
|
121
|
+
.replace(/\s+/g, "-")
|
|
122
|
+
.replace(/-+/g, "-")
|
|
123
|
+
.trim();
|
|
124
|
+
};
|
|
125
|
+
var isInternalLink = (href) => {
|
|
126
|
+
if (href.startsWith("http://") || href.startsWith("https://")) return false;
|
|
127
|
+
if (href.startsWith("mailto:")) return false;
|
|
128
|
+
if (href.startsWith("#")) return true;
|
|
129
|
+
if (href.endsWith(".md") || href.includes(".md#")) return true;
|
|
130
|
+
return !href.includes("://");
|
|
131
|
+
};
|
|
132
|
+
var extractPlainText = (node) => {
|
|
133
|
+
const texts = [];
|
|
134
|
+
visit(node, "text", (textNode) => {
|
|
135
|
+
texts.push(textNode.value);
|
|
136
|
+
});
|
|
137
|
+
return texts.join(" ");
|
|
138
|
+
};
|
|
139
|
+
var getNodeEndLine = (node) => {
|
|
140
|
+
return node?.position?.end?.line ?? 0;
|
|
141
|
+
};
|
|
142
|
+
var getNodeStartLine = (node) => {
|
|
143
|
+
return node?.position?.start?.line ?? 0;
|
|
144
|
+
};
|
|
145
|
+
var extractRawSections = (tree) => {
|
|
146
|
+
const sections = [];
|
|
147
|
+
const headings = [];
|
|
148
|
+
tree.children.forEach((node, index) => {
|
|
149
|
+
if (node.type === "heading") {
|
|
150
|
+
const heading = node;
|
|
151
|
+
headings.push({
|
|
152
|
+
heading: extractPlainText(heading),
|
|
153
|
+
level: heading.depth,
|
|
154
|
+
line: getNodeStartLine(node),
|
|
155
|
+
index,
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
});
|
|
159
|
+
headings.forEach((h, i) => {
|
|
160
|
+
const nextHeading = headings[i + 1];
|
|
161
|
+
const endIndex = nextHeading ? nextHeading.index : tree.children.length;
|
|
162
|
+
const contentNodes = tree.children.slice(h.index + 1, endIndex);
|
|
163
|
+
const lastContentNode = contentNodes[contentNodes.length - 1];
|
|
164
|
+
const endLine = lastContentNode ? getNodeEndLine(lastContentNode) : h.line;
|
|
165
|
+
sections.push({
|
|
166
|
+
heading: h.heading,
|
|
167
|
+
level: h.level,
|
|
168
|
+
startLine: h.line,
|
|
169
|
+
endLine,
|
|
170
|
+
contentStartLine: h.line + 1,
|
|
171
|
+
contentNodes,
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
return sections;
|
|
175
|
+
};
|
|
176
|
+
var buildSectionHierarchy = (rawSections, docId, lines) => {
|
|
177
|
+
const result = [];
|
|
178
|
+
const stack = [];
|
|
179
|
+
for (const raw of rawSections) {
|
|
180
|
+
const contentLines = lines.slice(raw.startLine - 1, raw.endLine);
|
|
181
|
+
const content = contentLines.join("\n");
|
|
182
|
+
const plainText = extractSectionPlainText(raw.contentNodes);
|
|
183
|
+
const hasCode = raw.contentNodes.some((n) => n.type === "code");
|
|
184
|
+
const hasList = raw.contentNodes.some((n) => n.type === "list");
|
|
185
|
+
const hasTable = raw.contentNodes.some((n) => n.type === "table");
|
|
186
|
+
const section = {
|
|
187
|
+
id: `${docId}-${slugify(raw.heading)}`,
|
|
188
|
+
heading: raw.heading,
|
|
189
|
+
level: raw.level,
|
|
190
|
+
content,
|
|
191
|
+
plainText,
|
|
192
|
+
startLine: raw.startLine,
|
|
193
|
+
endLine: raw.endLine,
|
|
194
|
+
children: [],
|
|
195
|
+
metadata: {
|
|
196
|
+
wordCount: countWords(plainText),
|
|
197
|
+
tokenCount: countTokensApprox(content),
|
|
198
|
+
hasCode,
|
|
199
|
+
hasList,
|
|
200
|
+
hasTable,
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
while (stack.length > 0 && stack[stack.length - 1].level >= raw.level) {
|
|
204
|
+
stack.pop();
|
|
205
|
+
}
|
|
206
|
+
if (stack.length === 0) {
|
|
207
|
+
result.push(section);
|
|
208
|
+
} else {
|
|
209
|
+
const parent = stack[stack.length - 1];
|
|
210
|
+
parent.section.children.push(section);
|
|
211
|
+
}
|
|
212
|
+
stack.push({ section, level: raw.level });
|
|
213
|
+
}
|
|
214
|
+
return result;
|
|
215
|
+
};
|
|
216
|
+
var extractSectionPlainText = (nodes) => {
|
|
217
|
+
const texts = [];
|
|
218
|
+
for (const node of nodes) {
|
|
219
|
+
if ("value" in node && typeof node.value === "string") {
|
|
220
|
+
texts.push(node.value);
|
|
221
|
+
} else if ("children" in node) {
|
|
222
|
+
texts.push(extractPlainText(node));
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
return texts.join(" ");
|
|
226
|
+
};
|
|
227
|
+
var countAllSections = (sections) => {
|
|
228
|
+
let count = 0;
|
|
229
|
+
for (const section of sections) {
|
|
230
|
+
count += 1;
|
|
231
|
+
count += countAllSections(section.children);
|
|
232
|
+
}
|
|
233
|
+
return count;
|
|
234
|
+
};
|
|
235
|
+
var extractLinks = (tree, docId) => {
|
|
236
|
+
const links = [];
|
|
237
|
+
let currentSectionId = docId;
|
|
238
|
+
visit(tree, (node) => {
|
|
239
|
+
if (node.type === "heading") {
|
|
240
|
+
currentSectionId = `${docId}-${slugify(extractPlainText(node))}`;
|
|
241
|
+
}
|
|
242
|
+
if (node.type === "link") {
|
|
243
|
+
const link = node;
|
|
244
|
+
const internal = isInternalLink(link.url);
|
|
245
|
+
links.push({
|
|
246
|
+
type: internal ? "internal" : "external",
|
|
247
|
+
href: link.url,
|
|
248
|
+
text: extractPlainText(link),
|
|
249
|
+
sectionId: currentSectionId,
|
|
250
|
+
line: getNodeStartLine(node),
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
if (node.type === "image") {
|
|
254
|
+
const img = node;
|
|
255
|
+
links.push({
|
|
256
|
+
type: "image",
|
|
257
|
+
href: img.url,
|
|
258
|
+
text: img.alt ?? "",
|
|
259
|
+
sectionId: currentSectionId,
|
|
260
|
+
line: getNodeStartLine(node),
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
return links;
|
|
265
|
+
};
|
|
266
|
+
var extractCodeBlocks = (tree, docId) => {
|
|
267
|
+
const codeBlocks = [];
|
|
268
|
+
let currentSectionId = docId;
|
|
269
|
+
visit(tree, (node) => {
|
|
270
|
+
if (node.type === "heading") {
|
|
271
|
+
currentSectionId = `${docId}-${slugify(extractPlainText(node))}`;
|
|
272
|
+
}
|
|
273
|
+
if (node.type === "code") {
|
|
274
|
+
const code = node;
|
|
275
|
+
codeBlocks.push({
|
|
276
|
+
language: code.lang ?? null,
|
|
277
|
+
content: code.value,
|
|
278
|
+
sectionId: currentSectionId,
|
|
279
|
+
startLine: getNodeStartLine(node),
|
|
280
|
+
endLine: getNodeEndLine(node),
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
});
|
|
284
|
+
return codeBlocks;
|
|
285
|
+
};
|
|
286
|
+
var parse = (content, options = {}) =>
|
|
287
|
+
Effect2.gen(function* () {
|
|
288
|
+
const path3 = options.path ?? "unknown";
|
|
289
|
+
const docId = generateId(path3);
|
|
290
|
+
const now = /* @__PURE__ */ new Date();
|
|
291
|
+
let frontmatter = {};
|
|
292
|
+
let markdownContent = content;
|
|
293
|
+
try {
|
|
294
|
+
const parsed = matter(content);
|
|
295
|
+
frontmatter = parsed.data;
|
|
296
|
+
markdownContent = parsed.content;
|
|
297
|
+
} catch (error) {
|
|
298
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
299
|
+
console.warn(
|
|
300
|
+
`Warning: Malformed frontmatter in ${path3}, skipping: ${msg.split("\n")[0]}`,
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
const tree = processor.parse(markdownContent);
|
|
304
|
+
const lines = markdownContent.split("\n");
|
|
305
|
+
const rawSections = extractRawSections(tree);
|
|
306
|
+
const sections = buildSectionHierarchy(rawSections, docId, lines);
|
|
307
|
+
const links = extractLinks(tree, docId);
|
|
308
|
+
const codeBlocks = extractCodeBlocks(tree, docId);
|
|
309
|
+
const firstH1 = sections.find((s) => s.level === 1);
|
|
310
|
+
const title =
|
|
311
|
+
firstH1?.heading ??
|
|
312
|
+
(typeof frontmatter.title === "string" ? frontmatter.title : null) ??
|
|
313
|
+
path3.split("/").pop()?.replace(/\.md$/, "") ??
|
|
314
|
+
"Untitled";
|
|
315
|
+
const totalContent = sections.map((s) => s.content).join("\n");
|
|
316
|
+
const metadata = {
|
|
317
|
+
wordCount: countWords(totalContent),
|
|
318
|
+
tokenCount: countTokensApprox(content),
|
|
319
|
+
headingCount: countAllSections(sections),
|
|
320
|
+
linkCount: links.length,
|
|
321
|
+
codeBlockCount: codeBlocks.length,
|
|
322
|
+
lastModified: options.lastModified ?? now,
|
|
323
|
+
indexedAt: now,
|
|
324
|
+
};
|
|
325
|
+
const document = {
|
|
326
|
+
id: docId,
|
|
327
|
+
path: path3,
|
|
328
|
+
title,
|
|
329
|
+
frontmatter,
|
|
330
|
+
sections,
|
|
331
|
+
links,
|
|
332
|
+
codeBlocks,
|
|
333
|
+
metadata,
|
|
334
|
+
};
|
|
335
|
+
return document;
|
|
336
|
+
});
|
|
337
|
+
var parseFile = (filePath) =>
|
|
338
|
+
Effect2.gen(function* () {
|
|
339
|
+
const fs3 = yield* Effect2.promise(() => import("fs/promises"));
|
|
340
|
+
let content;
|
|
341
|
+
let stats;
|
|
342
|
+
try {
|
|
343
|
+
[content, stats] = yield* Effect2.all([
|
|
344
|
+
Effect2.promise(() => fs3.readFile(filePath, "utf-8")),
|
|
345
|
+
Effect2.promise(() => fs3.stat(filePath)),
|
|
346
|
+
]);
|
|
347
|
+
} catch (error) {
|
|
348
|
+
return yield* Effect2.fail({
|
|
349
|
+
_tag: "IoError",
|
|
350
|
+
message: error instanceof Error ? error.message : "Unknown error",
|
|
351
|
+
path: filePath,
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
return yield* parse(content, {
|
|
355
|
+
path: filePath,
|
|
356
|
+
lastModified: stats.mtime,
|
|
357
|
+
});
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
// src/index/types.ts
|
|
361
|
+
var INDEX_DIR = ".mdcontext";
|
|
362
|
+
var INDEX_VERSION = 1;
|
|
363
|
+
var getIndexPaths = (rootPath) => ({
|
|
364
|
+
root: `${rootPath}/${INDEX_DIR}`,
|
|
365
|
+
config: `${rootPath}/${INDEX_DIR}/config.json`,
|
|
366
|
+
documents: `${rootPath}/${INDEX_DIR}/indexes/documents.json`,
|
|
367
|
+
sections: `${rootPath}/${INDEX_DIR}/indexes/sections.json`,
|
|
368
|
+
links: `${rootPath}/${INDEX_DIR}/indexes/links.json`,
|
|
369
|
+
cache: `${rootPath}/${INDEX_DIR}/cache`,
|
|
370
|
+
parsed: `${rootPath}/${INDEX_DIR}/cache/parsed`,
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
// src/index/storage.ts
|
|
374
|
+
import * as crypto2 from "crypto";
|
|
375
|
+
import { Effect as Effect3 } from "effect";
|
|
376
|
+
import * as fs from "fs/promises";
|
|
377
|
+
import * as path from "path";
|
|
378
|
+
|
|
379
|
+
var ensureDir = (dirPath) =>
|
|
380
|
+
Effect3.tryPromise({
|
|
381
|
+
try: () => fs.mkdir(dirPath, { recursive: true }),
|
|
382
|
+
catch: (e) => new Error(`Failed to create directory ${dirPath}: ${e}`),
|
|
383
|
+
}).pipe(Effect3.map(() => void 0));
|
|
384
|
+
var readJsonFile = (filePath) =>
|
|
385
|
+
Effect3.tryPromise({
|
|
386
|
+
try: async () => {
|
|
387
|
+
try {
|
|
388
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
389
|
+
return JSON.parse(content);
|
|
390
|
+
} catch {
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
},
|
|
394
|
+
catch: (e) => new Error(`Failed to read ${filePath}: ${e}`),
|
|
395
|
+
});
|
|
396
|
+
var writeJsonFile = (filePath, data) =>
|
|
397
|
+
Effect3.gen(function* () {
|
|
398
|
+
const dir = path.dirname(filePath);
|
|
399
|
+
yield* ensureDir(dir);
|
|
400
|
+
yield* Effect3.tryPromise({
|
|
401
|
+
try: () => fs.writeFile(filePath, JSON.stringify(data, null, 2)),
|
|
402
|
+
catch: (e) => new Error(`Failed to write ${filePath}: ${e}`),
|
|
403
|
+
});
|
|
404
|
+
});
|
|
405
|
+
var computeHash = (content) => {
|
|
406
|
+
return crypto2
|
|
407
|
+
.createHash("sha256")
|
|
408
|
+
.update(content)
|
|
409
|
+
.digest("hex")
|
|
410
|
+
.slice(0, 16);
|
|
411
|
+
};
|
|
412
|
+
var createStorage = (rootPath) => ({
|
|
413
|
+
rootPath: path.resolve(rootPath),
|
|
414
|
+
paths: getIndexPaths(path.resolve(rootPath)),
|
|
415
|
+
});
|
|
416
|
+
var initializeIndex = (storage) =>
|
|
417
|
+
Effect3.gen(function* () {
|
|
418
|
+
yield* ensureDir(storage.paths.root);
|
|
419
|
+
yield* ensureDir(storage.paths.parsed);
|
|
420
|
+
yield* ensureDir(path.dirname(storage.paths.documents));
|
|
421
|
+
const existingConfig = yield* loadConfig(storage);
|
|
422
|
+
if (!existingConfig) {
|
|
423
|
+
const config = {
|
|
424
|
+
version: INDEX_VERSION,
|
|
425
|
+
rootPath: storage.rootPath,
|
|
426
|
+
include: ["**/*.md", "**/*.mdx"],
|
|
427
|
+
exclude: ["**/node_modules/**", "**/.*/**"],
|
|
428
|
+
createdAt: /* @__PURE__ */ new Date().toISOString(),
|
|
429
|
+
updatedAt: /* @__PURE__ */ new Date().toISOString(),
|
|
430
|
+
};
|
|
431
|
+
yield* saveConfig(storage, config);
|
|
432
|
+
}
|
|
433
|
+
});
|
|
434
|
+
var loadConfig = (storage) => readJsonFile(storage.paths.config);
|
|
435
|
+
var saveConfig = (storage, config) =>
|
|
436
|
+
writeJsonFile(storage.paths.config, {
|
|
437
|
+
...config,
|
|
438
|
+
updatedAt: /* @__PURE__ */ new Date().toISOString(),
|
|
439
|
+
});
|
|
440
|
+
var loadDocumentIndex = (storage) => readJsonFile(storage.paths.documents);
|
|
441
|
+
var saveDocumentIndex = (storage, index) =>
|
|
442
|
+
writeJsonFile(storage.paths.documents, index);
|
|
443
|
+
var createEmptyDocumentIndex = (rootPath) => ({
|
|
444
|
+
version: INDEX_VERSION,
|
|
445
|
+
rootPath,
|
|
446
|
+
documents: {},
|
|
447
|
+
});
|
|
448
|
+
var loadSectionIndex = (storage) => readJsonFile(storage.paths.sections);
|
|
449
|
+
var saveSectionIndex = (storage, index) =>
|
|
450
|
+
writeJsonFile(storage.paths.sections, index);
|
|
451
|
+
var createEmptySectionIndex = () => ({
|
|
452
|
+
version: INDEX_VERSION,
|
|
453
|
+
sections: {},
|
|
454
|
+
byHeading: {},
|
|
455
|
+
byDocument: {},
|
|
456
|
+
});
|
|
457
|
+
var loadLinkIndex = (storage) => readJsonFile(storage.paths.links);
|
|
458
|
+
var saveLinkIndex = (storage, index) =>
|
|
459
|
+
writeJsonFile(storage.paths.links, index);
|
|
460
|
+
var createEmptyLinkIndex = () => ({
|
|
461
|
+
version: INDEX_VERSION,
|
|
462
|
+
forward: {},
|
|
463
|
+
backward: {},
|
|
464
|
+
broken: [],
|
|
465
|
+
});
|
|
466
|
+
var indexExists = (storage) =>
|
|
467
|
+
Effect3.tryPromise({
|
|
468
|
+
try: async () => {
|
|
469
|
+
try {
|
|
470
|
+
await fs.access(storage.paths.config);
|
|
471
|
+
return true;
|
|
472
|
+
} catch {
|
|
473
|
+
return false;
|
|
474
|
+
}
|
|
475
|
+
},
|
|
476
|
+
catch: (e) => new Error(`Failed to check index existence: ${e}`),
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
import { Effect as Effect4 } from "effect";
|
|
480
|
+
// src/index/indexer.ts
|
|
481
|
+
import * as fs2 from "fs/promises";
|
|
482
|
+
import * as path2 from "path";
|
|
483
|
+
|
|
484
|
+
var isMarkdownFile = (filename) =>
|
|
485
|
+
filename.endsWith(".md") || filename.endsWith(".mdx");
|
|
486
|
+
var shouldExclude = (filePath, exclude) => {
|
|
487
|
+
const normalized = filePath.toLowerCase();
|
|
488
|
+
for (const pattern of exclude) {
|
|
489
|
+
if (
|
|
490
|
+
pattern.includes("node_modules") &&
|
|
491
|
+
normalized.includes("node_modules")
|
|
492
|
+
) {
|
|
493
|
+
return true;
|
|
494
|
+
}
|
|
495
|
+
if (pattern.startsWith("**/.*") && normalized.includes("/.")) {
|
|
496
|
+
return true;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
return false;
|
|
500
|
+
};
|
|
501
|
+
var walkDirectory = async (dir, exclude) => {
|
|
502
|
+
const files = [];
|
|
503
|
+
const entries = await fs2.readdir(dir, { withFileTypes: true });
|
|
504
|
+
for (const entry of entries) {
|
|
505
|
+
const fullPath = path2.join(dir, entry.name);
|
|
506
|
+
if (entry.name.startsWith(".") || entry.name === "node_modules") {
|
|
507
|
+
continue;
|
|
508
|
+
}
|
|
509
|
+
if (shouldExclude(fullPath, exclude)) {
|
|
510
|
+
continue;
|
|
511
|
+
}
|
|
512
|
+
if (entry.isDirectory()) {
|
|
513
|
+
const subFiles = await walkDirectory(fullPath, exclude);
|
|
514
|
+
files.push(...subFiles);
|
|
515
|
+
} else if (entry.isFile() && isMarkdownFile(entry.name)) {
|
|
516
|
+
files.push(fullPath);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
return files;
|
|
520
|
+
};
|
|
521
|
+
var flattenSections = (sections, docId, docPath) => {
|
|
522
|
+
const result = [];
|
|
523
|
+
const traverse = (section) => {
|
|
524
|
+
result.push({
|
|
525
|
+
id: section.id,
|
|
526
|
+
documentId: docId,
|
|
527
|
+
documentPath: docPath,
|
|
528
|
+
heading: section.heading,
|
|
529
|
+
level: section.level,
|
|
530
|
+
startLine: section.startLine,
|
|
531
|
+
endLine: section.endLine,
|
|
532
|
+
tokenCount: section.metadata.tokenCount,
|
|
533
|
+
hasCode: section.metadata.hasCode,
|
|
534
|
+
hasList: section.metadata.hasList,
|
|
535
|
+
hasTable: section.metadata.hasTable,
|
|
536
|
+
});
|
|
537
|
+
for (const child of section.children) {
|
|
538
|
+
traverse(child);
|
|
539
|
+
}
|
|
540
|
+
};
|
|
541
|
+
for (const section of sections) {
|
|
542
|
+
traverse(section);
|
|
543
|
+
}
|
|
544
|
+
return result;
|
|
545
|
+
};
|
|
546
|
+
var resolveInternalLink = (href, fromPath, rootPath) => {
|
|
547
|
+
if (href.startsWith("#")) {
|
|
548
|
+
return fromPath;
|
|
549
|
+
}
|
|
550
|
+
if (href.startsWith("http://") || href.startsWith("https://")) {
|
|
551
|
+
return null;
|
|
552
|
+
}
|
|
553
|
+
const linkPath = href.split("#")[0] ?? "";
|
|
554
|
+
if (!linkPath) return null;
|
|
555
|
+
const fromDir = path2.dirname(fromPath);
|
|
556
|
+
const resolved = path2.resolve(fromDir, linkPath);
|
|
557
|
+
if (!resolved.startsWith(rootPath)) {
|
|
558
|
+
return null;
|
|
559
|
+
}
|
|
560
|
+
return path2.relative(rootPath, resolved);
|
|
561
|
+
};
|
|
562
|
+
var buildIndex = (rootPath, options = {}) =>
|
|
563
|
+
Effect4.gen(function* () {
|
|
564
|
+
const startTime = Date.now();
|
|
565
|
+
const storage = createStorage(rootPath);
|
|
566
|
+
const errors = [];
|
|
567
|
+
yield* initializeIndex(storage);
|
|
568
|
+
const existingDocIndex = yield* loadDocumentIndex(storage);
|
|
569
|
+
const docIndex =
|
|
570
|
+
options.force || !existingDocIndex
|
|
571
|
+
? createEmptyDocumentIndex(storage.rootPath)
|
|
572
|
+
: existingDocIndex;
|
|
573
|
+
const existingSectionIndex = yield* loadSectionIndex(storage);
|
|
574
|
+
const existingLinkIndex = yield* loadLinkIndex(storage);
|
|
575
|
+
const sectionIndex = existingSectionIndex ?? createEmptySectionIndex();
|
|
576
|
+
const linkIndex = existingLinkIndex ?? createEmptyLinkIndex();
|
|
577
|
+
const exclude = options.exclude ?? ["**/node_modules/**", "**/.*/**"];
|
|
578
|
+
const files = yield* Effect4.tryPromise({
|
|
579
|
+
try: () => walkDirectory(storage.rootPath, exclude),
|
|
580
|
+
catch: (e) => new Error(`Failed to walk directory: ${e}`),
|
|
581
|
+
});
|
|
582
|
+
let documentsIndexed = 0;
|
|
583
|
+
let sectionsIndexed = 0;
|
|
584
|
+
let linksIndexed = 0;
|
|
585
|
+
const mutableDocuments = {
|
|
586
|
+
...docIndex.documents,
|
|
587
|
+
};
|
|
588
|
+
const mutableSections = {
|
|
589
|
+
...sectionIndex.sections,
|
|
590
|
+
};
|
|
591
|
+
const mutableByHeading = Object.fromEntries(
|
|
592
|
+
Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]]),
|
|
593
|
+
);
|
|
594
|
+
const mutableByDocument = Object.fromEntries(
|
|
595
|
+
Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]]),
|
|
596
|
+
);
|
|
597
|
+
const mutableForward = Object.fromEntries(
|
|
598
|
+
Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]]),
|
|
599
|
+
);
|
|
600
|
+
const mutableBackward = Object.fromEntries(
|
|
601
|
+
Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]]),
|
|
602
|
+
);
|
|
603
|
+
const brokenLinks = [...linkIndex.broken];
|
|
604
|
+
for (const filePath of files) {
|
|
605
|
+
const relativePath = path2.relative(storage.rootPath, filePath);
|
|
606
|
+
const processFile = Effect4.gen(function* () {
|
|
607
|
+
const [content, stats] = yield* Effect4.promise(() =>
|
|
608
|
+
Promise.all([fs2.readFile(filePath, "utf-8"), fs2.stat(filePath)]),
|
|
609
|
+
);
|
|
610
|
+
const hash = computeHash(content);
|
|
611
|
+
const existingEntry = mutableDocuments[relativePath];
|
|
612
|
+
if (
|
|
613
|
+
!options.force &&
|
|
614
|
+
existingEntry &&
|
|
615
|
+
existingEntry.hash === hash &&
|
|
616
|
+
existingEntry.mtime === stats.mtime.getTime()
|
|
617
|
+
) {
|
|
618
|
+
return;
|
|
619
|
+
}
|
|
620
|
+
const doc = yield* parse(content, {
|
|
621
|
+
path: relativePath,
|
|
622
|
+
lastModified: stats.mtime,
|
|
623
|
+
}).pipe(
|
|
624
|
+
Effect4.mapError(
|
|
625
|
+
(e) => new Error(`Parse error in ${relativePath}: ${e.message}`),
|
|
626
|
+
),
|
|
627
|
+
);
|
|
628
|
+
if (existingEntry) {
|
|
629
|
+
const oldSectionIds = mutableByDocument[existingEntry.id] ?? [];
|
|
630
|
+
for (const sectionId of oldSectionIds) {
|
|
631
|
+
const oldSection = mutableSections[sectionId];
|
|
632
|
+
if (oldSection) {
|
|
633
|
+
const headingKey = oldSection.heading.toLowerCase();
|
|
634
|
+
const headingList = mutableByHeading[headingKey];
|
|
635
|
+
if (headingList) {
|
|
636
|
+
const idx = headingList.indexOf(sectionId);
|
|
637
|
+
if (idx !== -1) headingList.splice(idx, 1);
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
delete mutableSections[sectionId];
|
|
641
|
+
}
|
|
642
|
+
delete mutableByDocument[existingEntry.id];
|
|
643
|
+
delete mutableForward[relativePath];
|
|
644
|
+
}
|
|
645
|
+
mutableDocuments[relativePath] = {
|
|
646
|
+
id: doc.id,
|
|
647
|
+
path: relativePath,
|
|
648
|
+
title: doc.title,
|
|
649
|
+
mtime: stats.mtime.getTime(),
|
|
650
|
+
hash,
|
|
651
|
+
tokenCount: doc.metadata.tokenCount,
|
|
652
|
+
sectionCount: doc.metadata.headingCount,
|
|
653
|
+
};
|
|
654
|
+
documentsIndexed++;
|
|
655
|
+
const sections = flattenSections(doc.sections, doc.id, relativePath);
|
|
656
|
+
mutableByDocument[doc.id] = [];
|
|
657
|
+
for (const section of sections) {
|
|
658
|
+
mutableSections[section.id] = section;
|
|
659
|
+
mutableByDocument[doc.id]?.push(section.id);
|
|
660
|
+
const headingKey = section.heading.toLowerCase();
|
|
661
|
+
if (!mutableByHeading[headingKey]) {
|
|
662
|
+
mutableByHeading[headingKey] = [];
|
|
663
|
+
}
|
|
664
|
+
mutableByHeading[headingKey]?.push(section.id);
|
|
665
|
+
sectionsIndexed++;
|
|
666
|
+
}
|
|
667
|
+
const internalLinks = doc.links.filter((l) => l.type === "internal");
|
|
668
|
+
const outgoingLinks = [];
|
|
669
|
+
for (const link of internalLinks) {
|
|
670
|
+
const target = resolveInternalLink(
|
|
671
|
+
link.href,
|
|
672
|
+
filePath,
|
|
673
|
+
storage.rootPath,
|
|
674
|
+
);
|
|
675
|
+
if (target) {
|
|
676
|
+
outgoingLinks.push(target);
|
|
677
|
+
if (!mutableBackward[target]) {
|
|
678
|
+
mutableBackward[target] = [];
|
|
679
|
+
}
|
|
680
|
+
if (!mutableBackward[target]?.includes(relativePath)) {
|
|
681
|
+
mutableBackward[target]?.push(relativePath);
|
|
682
|
+
}
|
|
683
|
+
linksIndexed++;
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
mutableForward[relativePath] = outgoingLinks;
|
|
687
|
+
}).pipe(
|
|
688
|
+
Effect4.catchAll((error) => {
|
|
689
|
+
errors.push({
|
|
690
|
+
path: relativePath,
|
|
691
|
+
message: error instanceof Error ? error.message : String(error),
|
|
692
|
+
});
|
|
693
|
+
return Effect4.void;
|
|
694
|
+
}),
|
|
695
|
+
);
|
|
696
|
+
yield* processFile;
|
|
697
|
+
}
|
|
698
|
+
for (const [_from, targets] of Object.entries(mutableForward)) {
|
|
699
|
+
for (const target of targets) {
|
|
700
|
+
if (!mutableDocuments[target] && !brokenLinks.includes(target)) {
|
|
701
|
+
brokenLinks.push(target);
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
yield* saveDocumentIndex(storage, {
|
|
706
|
+
version: docIndex.version,
|
|
707
|
+
rootPath: storage.rootPath,
|
|
708
|
+
documents: mutableDocuments,
|
|
709
|
+
});
|
|
710
|
+
yield* saveSectionIndex(storage, {
|
|
711
|
+
version: sectionIndex.version,
|
|
712
|
+
sections: mutableSections,
|
|
713
|
+
byHeading: mutableByHeading,
|
|
714
|
+
byDocument: mutableByDocument,
|
|
715
|
+
});
|
|
716
|
+
yield* saveLinkIndex(storage, {
|
|
717
|
+
version: linkIndex.version,
|
|
718
|
+
forward: mutableForward,
|
|
719
|
+
backward: mutableBackward,
|
|
720
|
+
broken: brokenLinks,
|
|
721
|
+
});
|
|
722
|
+
const duration = Date.now() - startTime;
|
|
723
|
+
const totalLinks = Object.values(mutableForward).reduce(
|
|
724
|
+
(sum, links) => sum + links.length,
|
|
725
|
+
0,
|
|
726
|
+
);
|
|
727
|
+
return {
|
|
728
|
+
documentsIndexed,
|
|
729
|
+
sectionsIndexed,
|
|
730
|
+
linksIndexed,
|
|
731
|
+
totalDocuments: Object.keys(mutableDocuments).length,
|
|
732
|
+
totalSections: Object.keys(mutableSections).length,
|
|
733
|
+
totalLinks,
|
|
734
|
+
duration,
|
|
735
|
+
errors,
|
|
736
|
+
};
|
|
737
|
+
});
|
|
738
|
+
var getOutgoingLinks = (rootPath, filePath) =>
|
|
739
|
+
Effect4.gen(function* () {
|
|
740
|
+
const storage = createStorage(rootPath);
|
|
741
|
+
const linkIndex = yield* loadLinkIndex(storage);
|
|
742
|
+
if (!linkIndex) {
|
|
743
|
+
return [];
|
|
744
|
+
}
|
|
745
|
+
const relativePath = path2.relative(
|
|
746
|
+
storage.rootPath,
|
|
747
|
+
path2.resolve(filePath),
|
|
748
|
+
);
|
|
749
|
+
return linkIndex.forward[relativePath] ?? [];
|
|
750
|
+
});
|
|
751
|
+
var getIncomingLinks = (rootPath, filePath) =>
|
|
752
|
+
Effect4.gen(function* () {
|
|
753
|
+
const storage = createStorage(rootPath);
|
|
754
|
+
const linkIndex = yield* loadLinkIndex(storage);
|
|
755
|
+
if (!linkIndex) {
|
|
756
|
+
return [];
|
|
757
|
+
}
|
|
758
|
+
const relativePath = path2.relative(
|
|
759
|
+
storage.rootPath,
|
|
760
|
+
path2.resolve(filePath),
|
|
761
|
+
);
|
|
762
|
+
return linkIndex.backward[relativePath] ?? [];
|
|
763
|
+
});
|
|
764
|
+
var getBrokenLinks = (rootPath) =>
|
|
765
|
+
Effect4.gen(function* () {
|
|
766
|
+
const storage = createStorage(rootPath);
|
|
767
|
+
const linkIndex = yield* loadLinkIndex(storage);
|
|
768
|
+
if (!linkIndex) {
|
|
769
|
+
return [];
|
|
770
|
+
}
|
|
771
|
+
return linkIndex.broken;
|
|
772
|
+
});
|
|
773
|
+
|
|
774
|
+
export {
|
|
775
|
+
countTokens,
|
|
776
|
+
countTokensApprox,
|
|
777
|
+
countWords,
|
|
778
|
+
freeEncoder,
|
|
779
|
+
parse,
|
|
780
|
+
parseFile,
|
|
781
|
+
INDEX_DIR,
|
|
782
|
+
INDEX_VERSION,
|
|
783
|
+
getIndexPaths,
|
|
784
|
+
computeHash,
|
|
785
|
+
createStorage,
|
|
786
|
+
initializeIndex,
|
|
787
|
+
loadConfig,
|
|
788
|
+
saveConfig,
|
|
789
|
+
loadDocumentIndex,
|
|
790
|
+
saveDocumentIndex,
|
|
791
|
+
createEmptyDocumentIndex,
|
|
792
|
+
loadSectionIndex,
|
|
793
|
+
saveSectionIndex,
|
|
794
|
+
createEmptySectionIndex,
|
|
795
|
+
loadLinkIndex,
|
|
796
|
+
saveLinkIndex,
|
|
797
|
+
createEmptyLinkIndex,
|
|
798
|
+
indexExists,
|
|
799
|
+
buildIndex,
|
|
800
|
+
getOutgoingLinks,
|
|
801
|
+
getIncomingLinks,
|
|
802
|
+
getBrokenLinks,
|
|
803
|
+
};
|