mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for section filtering utilities
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, expect, it } from 'vitest'
|
|
6
|
+
import type { HeadingLevel, MdDocument, MdSection } from '../core/types.js'
|
|
7
|
+
import {
|
|
8
|
+
buildSectionList,
|
|
9
|
+
extractSectionContent,
|
|
10
|
+
filterDocumentSections,
|
|
11
|
+
filterExcludedSections,
|
|
12
|
+
} from './section-filter.js'
|
|
13
|
+
|
|
14
|
+
// Helper to create minimal section for testing
|
|
15
|
+
const createSection = (
|
|
16
|
+
heading: string,
|
|
17
|
+
level: HeadingLevel,
|
|
18
|
+
children: MdSection[] = [],
|
|
19
|
+
tokenCount: number = 100,
|
|
20
|
+
): MdSection => ({
|
|
21
|
+
id: `section-${heading.toLowerCase().replace(/\s+/g, '-')}`,
|
|
22
|
+
heading,
|
|
23
|
+
level,
|
|
24
|
+
content: `# ${heading}\n\nContent for ${heading}`,
|
|
25
|
+
plainText: `Content for ${heading}`,
|
|
26
|
+
startLine: 1,
|
|
27
|
+
endLine: 10,
|
|
28
|
+
children,
|
|
29
|
+
metadata: {
|
|
30
|
+
wordCount: 10,
|
|
31
|
+
tokenCount,
|
|
32
|
+
hasCode: false,
|
|
33
|
+
hasList: false,
|
|
34
|
+
hasTable: false,
|
|
35
|
+
},
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
// Helper to create minimal document for testing
|
|
39
|
+
const createDocument = (sections: MdSection[]): MdDocument => ({
|
|
40
|
+
id: 'test-doc',
|
|
41
|
+
path: '/test/doc.md',
|
|
42
|
+
title: 'Test Document',
|
|
43
|
+
sections,
|
|
44
|
+
links: [],
|
|
45
|
+
codeBlocks: [],
|
|
46
|
+
metadata: {
|
|
47
|
+
tokenCount: sections.reduce((acc, s) => acc + s.metadata.tokenCount, 0),
|
|
48
|
+
headingCount: sections.length,
|
|
49
|
+
linkCount: 0,
|
|
50
|
+
codeBlockCount: 0,
|
|
51
|
+
wordCount: 100,
|
|
52
|
+
lastModified: new Date(),
|
|
53
|
+
indexedAt: new Date(),
|
|
54
|
+
},
|
|
55
|
+
frontmatter: {},
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
describe('section-filter', () => {
|
|
59
|
+
describe('filterExcludedSections', () => {
|
|
60
|
+
const sectionList = [
|
|
61
|
+
{ number: '1', heading: 'Introduction', level: 1, tokenCount: 100 },
|
|
62
|
+
{ number: '1.1', heading: 'Overview', level: 2, tokenCount: 50 },
|
|
63
|
+
{ number: '2', heading: 'Installation', level: 1, tokenCount: 200 },
|
|
64
|
+
{ number: '2.1', heading: 'Requirements', level: 2, tokenCount: 75 },
|
|
65
|
+
{ number: '2.2', heading: 'Setup Steps', level: 2, tokenCount: 80 },
|
|
66
|
+
{ number: '3', heading: 'API Reference', level: 1, tokenCount: 500 },
|
|
67
|
+
{ number: '3.1', heading: 'Methods', level: 2, tokenCount: 300 },
|
|
68
|
+
{ number: '4', heading: 'License', level: 1, tokenCount: 50 },
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
it('returns all sections when no exclusion patterns provided', () => {
|
|
72
|
+
const result = filterExcludedSections(sectionList, [])
|
|
73
|
+
expect(result).toEqual(sectionList)
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
it('excludes sections by exact heading match', () => {
|
|
77
|
+
const result = filterExcludedSections(sectionList, ['License'])
|
|
78
|
+
expect(result).toHaveLength(7)
|
|
79
|
+
expect(result.find((s) => s.heading === 'License')).toBeUndefined()
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
it('excludes sections by partial heading match', () => {
|
|
83
|
+
const result = filterExcludedSections(sectionList, ['Setup'])
|
|
84
|
+
expect(result).toHaveLength(7)
|
|
85
|
+
expect(result.find((s) => s.heading === 'Setup Steps')).toBeUndefined()
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
it('excludes sections by glob pattern', () => {
|
|
89
|
+
const result = filterExcludedSections(sectionList, ['*Reference*'])
|
|
90
|
+
expect(result).toHaveLength(7)
|
|
91
|
+
expect(result.find((s) => s.heading === 'API Reference')).toBeUndefined()
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
it('excludes sections by section number', () => {
|
|
95
|
+
const result = filterExcludedSections(sectionList, ['2.1'])
|
|
96
|
+
expect(result).toHaveLength(7)
|
|
97
|
+
expect(result.find((s) => s.number === '2.1')).toBeUndefined()
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
it('excludes multiple sections with multiple patterns', () => {
|
|
101
|
+
const result = filterExcludedSections(sectionList, [
|
|
102
|
+
'License',
|
|
103
|
+
'Overview',
|
|
104
|
+
])
|
|
105
|
+
expect(result).toHaveLength(6)
|
|
106
|
+
expect(result.find((s) => s.heading === 'License')).toBeUndefined()
|
|
107
|
+
expect(result.find((s) => s.heading === 'Overview')).toBeUndefined()
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
it('handles case-insensitive matching', () => {
|
|
111
|
+
const result = filterExcludedSections(sectionList, ['LICENSE'])
|
|
112
|
+
expect(result).toHaveLength(7)
|
|
113
|
+
expect(result.find((s) => s.heading === 'License')).toBeUndefined()
|
|
114
|
+
})
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
describe('extractSectionContent with exclusion', () => {
|
|
118
|
+
const doc = createDocument([
|
|
119
|
+
createSection('Introduction', 1, [
|
|
120
|
+
createSection('Getting Started', 2),
|
|
121
|
+
createSection('Quick Start', 2),
|
|
122
|
+
]),
|
|
123
|
+
createSection('API', 1, [
|
|
124
|
+
createSection('Methods', 2),
|
|
125
|
+
createSection('Properties', 2),
|
|
126
|
+
]),
|
|
127
|
+
createSection('License', 1),
|
|
128
|
+
])
|
|
129
|
+
|
|
130
|
+
it('extracts all matching sections without exclusion', () => {
|
|
131
|
+
const result = extractSectionContent(doc, '*')
|
|
132
|
+
expect(result.matchedNumbers).toHaveLength(7)
|
|
133
|
+
expect(result.excludedNumbers).toHaveLength(0)
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('excludes sections matching exclusion pattern', () => {
|
|
137
|
+
const result = extractSectionContent(doc, '*', {
|
|
138
|
+
exclude: ['License'],
|
|
139
|
+
})
|
|
140
|
+
expect(result.matchedNumbers).toHaveLength(6)
|
|
141
|
+
expect(result.excludedNumbers).toEqual(['3'])
|
|
142
|
+
expect(
|
|
143
|
+
result.sections.find((s) => s.heading === 'License'),
|
|
144
|
+
).toBeUndefined()
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
it('reports excluded sections in excludedNumbers', () => {
|
|
148
|
+
const result = extractSectionContent(doc, '*', {
|
|
149
|
+
exclude: ['Quick Start', 'Properties'],
|
|
150
|
+
})
|
|
151
|
+
expect(result.excludedNumbers).toContain('1.2')
|
|
152
|
+
expect(result.excludedNumbers).toContain('2.2')
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
it('combines shallow and exclude options', () => {
|
|
156
|
+
const result = extractSectionContent(doc, 'Introduction', {
|
|
157
|
+
shallow: true,
|
|
158
|
+
exclude: ['Getting Started'],
|
|
159
|
+
})
|
|
160
|
+
// With shallow, we only get Introduction without children
|
|
161
|
+
// The exclude pattern only affects the matched sections list
|
|
162
|
+
expect(result.sections).toHaveLength(1)
|
|
163
|
+
expect(result.sections[0]?.heading).toBe('Introduction')
|
|
164
|
+
})
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
describe('filterDocumentSections', () => {
|
|
168
|
+
const doc = createDocument([
|
|
169
|
+
createSection('Introduction', 1, [
|
|
170
|
+
createSection('Overview', 2),
|
|
171
|
+
createSection('Goals', 2),
|
|
172
|
+
]),
|
|
173
|
+
createSection('Installation', 1),
|
|
174
|
+
createSection('License', 1),
|
|
175
|
+
])
|
|
176
|
+
|
|
177
|
+
it('returns original document when no exclusion patterns', () => {
|
|
178
|
+
const result = filterDocumentSections(doc, [])
|
|
179
|
+
expect(result.document).toBe(doc)
|
|
180
|
+
expect(result.excludedCount).toBe(0)
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
it('filters out matching sections from document', () => {
|
|
184
|
+
const result = filterDocumentSections(doc, ['License'])
|
|
185
|
+
expect(result.excludedCount).toBe(1)
|
|
186
|
+
expect(result.document.sections).toHaveLength(2)
|
|
187
|
+
expect(
|
|
188
|
+
result.document.sections.find((s) => s.heading === 'License'),
|
|
189
|
+
).toBeUndefined()
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
it('filters out nested sections', () => {
|
|
193
|
+
const result = filterDocumentSections(doc, ['Overview'])
|
|
194
|
+
expect(result.excludedCount).toBe(1)
|
|
195
|
+
// Find Introduction section
|
|
196
|
+
const intro = result.document.sections.find(
|
|
197
|
+
(s) => s.heading === 'Introduction',
|
|
198
|
+
)
|
|
199
|
+
expect(intro).toBeDefined()
|
|
200
|
+
// Overview should be removed from children
|
|
201
|
+
expect(
|
|
202
|
+
intro?.children.find((c) => c.heading === 'Overview'),
|
|
203
|
+
).toBeUndefined()
|
|
204
|
+
// Goals should still be there
|
|
205
|
+
expect(intro?.children.find((c) => c.heading === 'Goals')).toBeDefined()
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
it('filters multiple sections with glob pattern', () => {
|
|
209
|
+
const result = filterDocumentSections(doc, ['*stallation*', 'License'])
|
|
210
|
+
expect(result.excludedCount).toBe(2)
|
|
211
|
+
expect(result.document.sections).toHaveLength(1)
|
|
212
|
+
expect(result.document.sections[0]?.heading).toBe('Introduction')
|
|
213
|
+
})
|
|
214
|
+
|
|
215
|
+
it('preserves document structure for non-matching sections', () => {
|
|
216
|
+
const result = filterDocumentSections(doc, ['NonExistent'])
|
|
217
|
+
expect(result.document).toBe(doc)
|
|
218
|
+
expect(result.excludedCount).toBe(0)
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
it('counts descendants when parent section is excluded', () => {
|
|
222
|
+
// Introduction has 2 children (Overview, Goals), so excluding Introduction
|
|
223
|
+
// should count 3 total excluded sections
|
|
224
|
+
const result = filterDocumentSections(doc, ['Introduction'])
|
|
225
|
+
expect(result.excludedCount).toBe(3) // Introduction + Overview + Goals
|
|
226
|
+
expect(result.document.sections).toHaveLength(2) // Installation + License
|
|
227
|
+
expect(
|
|
228
|
+
result.document.sections.find((s) => s.heading === 'Introduction'),
|
|
229
|
+
).toBeUndefined()
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
it('counts deeply nested descendants correctly', () => {
|
|
233
|
+
const deepDoc = createDocument([
|
|
234
|
+
createSection('Root', 1, [
|
|
235
|
+
createSection('Child 1', 2, [
|
|
236
|
+
createSection('Grandchild 1', 3),
|
|
237
|
+
createSection('Grandchild 2', 3),
|
|
238
|
+
]),
|
|
239
|
+
createSection('Child 2', 2),
|
|
240
|
+
]),
|
|
241
|
+
createSection('Other', 1),
|
|
242
|
+
])
|
|
243
|
+
const result = filterDocumentSections(deepDoc, ['Root'])
|
|
244
|
+
// Root + Child 1 + Grandchild 1 + Grandchild 2 + Child 2 = 5
|
|
245
|
+
expect(result.excludedCount).toBe(5)
|
|
246
|
+
expect(result.document.sections).toHaveLength(1)
|
|
247
|
+
expect(result.document.sections[0]?.heading).toBe('Other')
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
it('does not double-count when multiple patterns match same section', () => {
|
|
251
|
+
const result = filterDocumentSections(doc, ['Introduction', 'Intro*'])
|
|
252
|
+
// Both patterns match Introduction, but should only count once
|
|
253
|
+
// Introduction + Overview + Goals = 3
|
|
254
|
+
expect(result.excludedCount).toBe(3)
|
|
255
|
+
})
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
describe('buildSectionList', () => {
|
|
259
|
+
const doc = createDocument([
|
|
260
|
+
createSection('A', 1, [
|
|
261
|
+
createSection('A.1', 2, [createSection('A.1.1', 3)]),
|
|
262
|
+
createSection('A.2', 2),
|
|
263
|
+
]),
|
|
264
|
+
createSection('B', 1),
|
|
265
|
+
])
|
|
266
|
+
|
|
267
|
+
it('assigns correct hierarchical numbers', () => {
|
|
268
|
+
const list = buildSectionList(doc)
|
|
269
|
+
expect(list).toHaveLength(5)
|
|
270
|
+
expect(list[0]).toMatchObject({ number: '1', heading: 'A' })
|
|
271
|
+
expect(list[1]).toMatchObject({ number: '1.1', heading: 'A.1' })
|
|
272
|
+
expect(list[2]).toMatchObject({ number: '1.1.1', heading: 'A.1.1' })
|
|
273
|
+
expect(list[3]).toMatchObject({ number: '1.2', heading: 'A.2' })
|
|
274
|
+
expect(list[4]).toMatchObject({ number: '2', heading: 'B' })
|
|
275
|
+
})
|
|
276
|
+
})
|
|
277
|
+
})
|
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Section filtering utilities for extracting specific sections from markdown documents
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { MdDocument, MdSection } from '../core/types.js'
|
|
6
|
+
|
|
7
|
+
// ============================================================================
|
|
8
|
+
// Simple Glob Matching
|
|
9
|
+
// ============================================================================
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Simple glob pattern matching (supports * and ?)
|
|
13
|
+
*/
|
|
14
|
+
const globMatch = (text: string, pattern: string): boolean => {
|
|
15
|
+
// Convert glob pattern to regex
|
|
16
|
+
const regexPattern = pattern
|
|
17
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape regex special chars except * and ?
|
|
18
|
+
.replace(/\*/g, '.*')
|
|
19
|
+
.replace(/\?/g, '.')
|
|
20
|
+
|
|
21
|
+
const regex = new RegExp(`^${regexPattern}$`, 'i')
|
|
22
|
+
return regex.test(text)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// ============================================================================
|
|
26
|
+
// Types
|
|
27
|
+
// ============================================================================
|
|
28
|
+
|
|
29
|
+
export interface SectionListItem {
|
|
30
|
+
readonly number: string
|
|
31
|
+
readonly heading: string
|
|
32
|
+
readonly level: number
|
|
33
|
+
readonly tokenCount: number
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface SectionFilterOptions {
|
|
37
|
+
/** If true, don't include nested subsections */
|
|
38
|
+
readonly shallow?: boolean
|
|
39
|
+
/** Patterns to exclude from results */
|
|
40
|
+
readonly exclude?: readonly string[]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ============================================================================
|
|
44
|
+
// Section Map Building
|
|
45
|
+
// ============================================================================
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Build a flat list of all sections with their hierarchical numbers
|
|
49
|
+
* e.g., "1", "1.1", "1.2", "2", "2.1", etc.
|
|
50
|
+
*/
|
|
51
|
+
export const buildSectionList = (document: MdDocument): SectionListItem[] => {
|
|
52
|
+
const result: SectionListItem[] = []
|
|
53
|
+
|
|
54
|
+
const processSection = (
|
|
55
|
+
section: MdSection,
|
|
56
|
+
prefix: string,
|
|
57
|
+
index: number,
|
|
58
|
+
): void => {
|
|
59
|
+
const number = prefix ? `${prefix}.${index + 1}` : `${index + 1}`
|
|
60
|
+
|
|
61
|
+
result.push({
|
|
62
|
+
number,
|
|
63
|
+
heading: section.heading,
|
|
64
|
+
level: section.level,
|
|
65
|
+
tokenCount: section.metadata.tokenCount,
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
// Process children
|
|
69
|
+
section.children.forEach((child, i) => {
|
|
70
|
+
processSection(child, number, i)
|
|
71
|
+
})
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
document.sections.forEach((section, i) => {
|
|
75
|
+
processSection(section, '', i)
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
return result
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Format section list for display
|
|
83
|
+
*/
|
|
84
|
+
export const formatSectionList = (sections: SectionListItem[]): string => {
|
|
85
|
+
const lines: string[] = []
|
|
86
|
+
|
|
87
|
+
for (const section of sections) {
|
|
88
|
+
// Indent based on dots in number
|
|
89
|
+
const depth = (section.number.match(/\./g) || []).length
|
|
90
|
+
const indent = ' '.repeat(depth)
|
|
91
|
+
lines.push(
|
|
92
|
+
`${indent}${section.number}. ${section.heading} (${section.tokenCount} tokens)`,
|
|
93
|
+
)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return lines.join('\n')
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ============================================================================
|
|
100
|
+
// Section Matching
|
|
101
|
+
// ============================================================================
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Check if a section matches a selector (by number, exact name, or glob pattern)
|
|
105
|
+
*/
|
|
106
|
+
const matchesSelector = (
|
|
107
|
+
section: SectionListItem,
|
|
108
|
+
selector: string,
|
|
109
|
+
): boolean => {
|
|
110
|
+
// Check if it's a number match (e.g., "5.3")
|
|
111
|
+
if (/^[\d.]+$/.test(selector)) {
|
|
112
|
+
// Exact number match
|
|
113
|
+
return section.number === selector
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Check for exact heading match (case-insensitive)
|
|
117
|
+
if (section.heading.toLowerCase() === selector.toLowerCase()) {
|
|
118
|
+
return true
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Check for glob pattern match
|
|
122
|
+
if (selector.includes('*') || selector.includes('?')) {
|
|
123
|
+
return globMatch(section.heading, selector)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Partial match (contains)
|
|
127
|
+
return section.heading.toLowerCase().includes(selector.toLowerCase())
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Check if a section matches any of the exclusion patterns
|
|
132
|
+
*/
|
|
133
|
+
const matchesExclusionPatterns = (
|
|
134
|
+
section: SectionListItem,
|
|
135
|
+
excludePatterns: readonly string[],
|
|
136
|
+
): boolean => {
|
|
137
|
+
return excludePatterns.some((pattern) => matchesSelector(section, pattern))
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Find all sections matching a selector
|
|
142
|
+
*/
|
|
143
|
+
export const findMatchingSections = (
|
|
144
|
+
sectionList: SectionListItem[],
|
|
145
|
+
selector: string,
|
|
146
|
+
): SectionListItem[] => {
|
|
147
|
+
return sectionList.filter((s) => matchesSelector(s, selector))
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Filter sections by exclusion patterns
|
|
152
|
+
* Returns sections that don't match any of the exclusion patterns
|
|
153
|
+
*/
|
|
154
|
+
export const filterExcludedSections = (
|
|
155
|
+
sectionList: SectionListItem[],
|
|
156
|
+
excludePatterns: readonly string[],
|
|
157
|
+
): SectionListItem[] => {
|
|
158
|
+
if (excludePatterns.length === 0) {
|
|
159
|
+
return sectionList
|
|
160
|
+
}
|
|
161
|
+
return sectionList.filter(
|
|
162
|
+
(s) => !matchesExclusionPatterns(s, excludePatterns),
|
|
163
|
+
)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Get all descendant section numbers for a given section number
|
|
168
|
+
*/
|
|
169
|
+
const getDescendantNumbers = (
|
|
170
|
+
sectionList: SectionListItem[],
|
|
171
|
+
parentNumber: string,
|
|
172
|
+
): Set<string> => {
|
|
173
|
+
const result = new Set<string>()
|
|
174
|
+
const prefix = `${parentNumber}.`
|
|
175
|
+
|
|
176
|
+
for (const section of sectionList) {
|
|
177
|
+
if (section.number.startsWith(prefix)) {
|
|
178
|
+
result.add(section.number)
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return result
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// ============================================================================
|
|
186
|
+
// Section Content Extraction
|
|
187
|
+
// ============================================================================
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Extract content for specific sections from a document
|
|
191
|
+
*/
|
|
192
|
+
export const extractSectionContent = (
|
|
193
|
+
document: MdDocument,
|
|
194
|
+
selector: string,
|
|
195
|
+
options: SectionFilterOptions = {},
|
|
196
|
+
): {
|
|
197
|
+
sections: MdSection[]
|
|
198
|
+
matchedNumbers: string[]
|
|
199
|
+
excludedNumbers: string[]
|
|
200
|
+
} => {
|
|
201
|
+
const sectionList = buildSectionList(document)
|
|
202
|
+
let matchedSections = findMatchingSections(sectionList, selector)
|
|
203
|
+
|
|
204
|
+
// Track which sections were excluded
|
|
205
|
+
const excludedNumbers: string[] = []
|
|
206
|
+
|
|
207
|
+
// Apply exclusion patterns if provided
|
|
208
|
+
if (options.exclude && options.exclude.length > 0) {
|
|
209
|
+
const beforeFilter = matchedSections
|
|
210
|
+
matchedSections = filterExcludedSections(matchedSections, options.exclude)
|
|
211
|
+
|
|
212
|
+
// Track excluded sections for feedback
|
|
213
|
+
for (const section of beforeFilter) {
|
|
214
|
+
if (!matchedSections.includes(section)) {
|
|
215
|
+
excludedNumbers.push(section.number)
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (matchedSections.length === 0) {
|
|
221
|
+
return { sections: [], matchedNumbers: [], excludedNumbers }
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Get all section numbers to include
|
|
225
|
+
const numbersToInclude = new Set<string>()
|
|
226
|
+
const matchedNumbers: string[] = []
|
|
227
|
+
|
|
228
|
+
for (const matched of matchedSections) {
|
|
229
|
+
numbersToInclude.add(matched.number)
|
|
230
|
+
matchedNumbers.push(matched.number)
|
|
231
|
+
|
|
232
|
+
if (!options.shallow) {
|
|
233
|
+
// Include all descendants
|
|
234
|
+
const descendants = getDescendantNumbers(sectionList, matched.number)
|
|
235
|
+
for (const desc of descendants) {
|
|
236
|
+
numbersToInclude.add(desc)
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Build a map from section number to section for efficient lookup
|
|
242
|
+
const numberToSection = new Map<string, MdSection>()
|
|
243
|
+
|
|
244
|
+
const mapSections = (
|
|
245
|
+
sections: readonly MdSection[],
|
|
246
|
+
prefix: string,
|
|
247
|
+
): void => {
|
|
248
|
+
sections.forEach((section, i) => {
|
|
249
|
+
const number = prefix ? `${prefix}.${i + 1}` : `${i + 1}`
|
|
250
|
+
numberToSection.set(number, section)
|
|
251
|
+
mapSections(section.children, number)
|
|
252
|
+
})
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
mapSections(document.sections, '')
|
|
256
|
+
|
|
257
|
+
// Extract matching sections
|
|
258
|
+
const extractedSections: MdSection[] = []
|
|
259
|
+
|
|
260
|
+
for (const number of matchedNumbers) {
|
|
261
|
+
const section = numberToSection.get(number)
|
|
262
|
+
if (section) {
|
|
263
|
+
if (options.shallow) {
|
|
264
|
+
// Clone without children for shallow mode
|
|
265
|
+
extractedSections.push({
|
|
266
|
+
...section,
|
|
267
|
+
children: [],
|
|
268
|
+
})
|
|
269
|
+
} else {
|
|
270
|
+
extractedSections.push(section)
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return { sections: extractedSections, matchedNumbers, excludedNumbers }
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Format extracted sections as markdown content
|
|
280
|
+
*/
|
|
281
|
+
export const formatExtractedSections = (sections: MdSection[]): string => {
|
|
282
|
+
const formatSection = (
|
|
283
|
+
section: MdSection,
|
|
284
|
+
includeChildren: boolean,
|
|
285
|
+
): string => {
|
|
286
|
+
const lines: string[] = []
|
|
287
|
+
|
|
288
|
+
// Add heading
|
|
289
|
+
const headingPrefix = '#'.repeat(section.level)
|
|
290
|
+
lines.push(`${headingPrefix} ${section.heading}`)
|
|
291
|
+
lines.push('')
|
|
292
|
+
|
|
293
|
+
// Add content (strip the heading line if it starts with #)
|
|
294
|
+
const contentLines = section.content.split('\n')
|
|
295
|
+
const contentWithoutHeading = contentLines
|
|
296
|
+
.filter((line, i) => i > 0 || !line.startsWith('#'))
|
|
297
|
+
.join('\n')
|
|
298
|
+
.trim()
|
|
299
|
+
|
|
300
|
+
if (contentWithoutHeading) {
|
|
301
|
+
lines.push(contentWithoutHeading)
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (includeChildren) {
|
|
305
|
+
for (const child of section.children) {
|
|
306
|
+
lines.push('')
|
|
307
|
+
lines.push(formatSection(child, true))
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
return lines.join('\n')
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
return sections.map((s) => formatSection(s, true)).join('\n\n')
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// ============================================================================
|
|
318
|
+
// Document Section Filtering
|
|
319
|
+
// ============================================================================
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Filter sections from an MdDocument based on exclusion patterns
|
|
323
|
+
* Returns a new document with matching sections removed
|
|
324
|
+
*/
|
|
325
|
+
export const filterDocumentSections = (
|
|
326
|
+
document: MdDocument,
|
|
327
|
+
excludePatterns: readonly string[],
|
|
328
|
+
): { document: MdDocument; excludedCount: number } => {
|
|
329
|
+
if (excludePatterns.length === 0) {
|
|
330
|
+
return { document, excludedCount: 0 }
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const sectionList = buildSectionList(document)
|
|
334
|
+
let excludedCount = 0
|
|
335
|
+
|
|
336
|
+
// Build set of section numbers to exclude (including descendants)
|
|
337
|
+
const numbersToExclude = new Set<string>()
|
|
338
|
+
for (const section of sectionList) {
|
|
339
|
+
if (matchesExclusionPatterns(section, excludePatterns)) {
|
|
340
|
+
// Add the matched section and all its descendants
|
|
341
|
+
const prefix = `${section.number}.`
|
|
342
|
+
for (const candidate of sectionList) {
|
|
343
|
+
if (
|
|
344
|
+
candidate.number === section.number ||
|
|
345
|
+
candidate.number.startsWith(prefix)
|
|
346
|
+
) {
|
|
347
|
+
if (!numbersToExclude.has(candidate.number)) {
|
|
348
|
+
numbersToExclude.add(candidate.number)
|
|
349
|
+
excludedCount++
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
if (numbersToExclude.size === 0) {
|
|
357
|
+
return { document, excludedCount: 0 }
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Build mapping of numbers to filter
|
|
361
|
+
const filterSections = (
|
|
362
|
+
sections: readonly MdSection[],
|
|
363
|
+
prefix: string,
|
|
364
|
+
): MdSection[] => {
|
|
365
|
+
const result: MdSection[] = []
|
|
366
|
+
|
|
367
|
+
sections.forEach((section, i) => {
|
|
368
|
+
const number = prefix ? `${prefix}.${i + 1}` : `${i + 1}`
|
|
369
|
+
|
|
370
|
+
if (!numbersToExclude.has(number)) {
|
|
371
|
+
// Keep this section, recursively filter children
|
|
372
|
+
result.push({
|
|
373
|
+
...section,
|
|
374
|
+
children: filterSections(section.children, number),
|
|
375
|
+
})
|
|
376
|
+
}
|
|
377
|
+
// If excluded, skip this section entirely (including children)
|
|
378
|
+
})
|
|
379
|
+
|
|
380
|
+
return result
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
const filteredSections = filterSections(document.sections, '')
|
|
384
|
+
|
|
385
|
+
return {
|
|
386
|
+
document: {
|
|
387
|
+
...document,
|
|
388
|
+
sections: filteredSections,
|
|
389
|
+
},
|
|
390
|
+
excludedCount,
|
|
391
|
+
}
|
|
392
|
+
}
|