mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,1281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SEARCH Command
|
|
3
|
+
*
|
|
4
|
+
* Search markdown content by meaning or heading pattern.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as fs from 'node:fs/promises'
|
|
8
|
+
import * as path from 'node:path'
|
|
9
|
+
import * as readline from 'node:readline'
|
|
10
|
+
import { Args, Command, Options } from '@effect/cli'
|
|
11
|
+
import { Console, Effect, Option } from 'effect'
|
|
12
|
+
import { ConfigService, defaultConfig } from '../../config/index.js'
|
|
13
|
+
import type {
|
|
14
|
+
BuildEmbeddingsResult,
|
|
15
|
+
EmbeddingEstimate,
|
|
16
|
+
} from '../../embeddings/semantic-search.js'
|
|
17
|
+
import {
|
|
18
|
+
buildEmbeddings,
|
|
19
|
+
estimateEmbeddingCost,
|
|
20
|
+
semanticSearchWithStats,
|
|
21
|
+
} from '../../embeddings/semantic-search.js'
|
|
22
|
+
import type { SearchQuality } from '../../embeddings/types.js'
|
|
23
|
+
import { createStorage, loadSectionIndex } from '../../index/storage.js'
|
|
24
|
+
import { INDEX_DIR } from '../../index/types.js'
|
|
25
|
+
import { initializeReranker } from '../../search/cross-encoder.js'
|
|
26
|
+
import {
|
|
27
|
+
detectSearchModes,
|
|
28
|
+
hybridSearch,
|
|
29
|
+
type SearchMode,
|
|
30
|
+
} from '../../search/hybrid-search.js'
|
|
31
|
+
import { isAdvancedQuery } from '../../search/query-parser.js'
|
|
32
|
+
import { search, searchContent } from '../../search/searcher.js'
|
|
33
|
+
import {
|
|
34
|
+
type APIProviderName,
|
|
35
|
+
buildPrompt,
|
|
36
|
+
type CLIProviderName,
|
|
37
|
+
displaySummarizationError,
|
|
38
|
+
estimateSummaryCost,
|
|
39
|
+
formatResultsForSummary,
|
|
40
|
+
getBestAvailableSummarizer,
|
|
41
|
+
type SummarizableResult,
|
|
42
|
+
} from '../../summarization/index.js'
|
|
43
|
+
import { jsonOption, prettyOption } from '../options.js'
|
|
44
|
+
import {
|
|
45
|
+
createCostEstimateErrorHandler,
|
|
46
|
+
createEmbeddingErrorHandler,
|
|
47
|
+
} from '../shared-error-handling.js'
|
|
48
|
+
import { formatJson, getIndexInfo, isRegexPattern } from '../utils.js'
|
|
49
|
+
|
|
50
|
+
// Auto-index threshold is now configurable via search.autoIndexThreshold
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Check if content contains all the refine terms (case-insensitive).
|
|
54
|
+
*/
|
|
55
|
+
const contentMatchesAllTerms = (
|
|
56
|
+
content: string,
|
|
57
|
+
terms: readonly string[],
|
|
58
|
+
): boolean => {
|
|
59
|
+
const lowerContent = content.toLowerCase()
|
|
60
|
+
return terms.every((term) => lowerContent.includes(term.toLowerCase()))
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Section info for refine filtering.
|
|
65
|
+
*/
|
|
66
|
+
interface SectionInfo {
|
|
67
|
+
readonly documentPath: string
|
|
68
|
+
readonly startLine: number
|
|
69
|
+
readonly endLine: number
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Filter search results by refine terms using parallel file loading.
|
|
74
|
+
* Uses a file cache and concurrency limit for performance.
|
|
75
|
+
*
|
|
76
|
+
* @param rootPath - Root path for file loading
|
|
77
|
+
* @param results - Search results to filter
|
|
78
|
+
* @param refineTerms - Terms that must all be present in section content
|
|
79
|
+
* @param limit - Maximum results to return
|
|
80
|
+
* @param getSectionInfo - Function to extract section info from a result
|
|
81
|
+
*/
|
|
82
|
+
const filterResultsByRefineTerms = <T>(
|
|
83
|
+
rootPath: string,
|
|
84
|
+
results: readonly T[],
|
|
85
|
+
refineTerms: readonly string[],
|
|
86
|
+
limit: number,
|
|
87
|
+
getSectionInfo: (result: T) => SectionInfo | null,
|
|
88
|
+
): Effect.Effect<T[], never> =>
|
|
89
|
+
Effect.gen(function* () {
|
|
90
|
+
if (refineTerms.length === 0 || results.length === 0) {
|
|
91
|
+
return results.slice(0, limit) as T[]
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Cache for file contents to avoid re-reading files
|
|
95
|
+
const fileCache = new Map<string, string | null>()
|
|
96
|
+
|
|
97
|
+
const getFileContent = (
|
|
98
|
+
documentPath: string,
|
|
99
|
+
): Effect.Effect<string | null, never> =>
|
|
100
|
+
Effect.gen(function* () {
|
|
101
|
+
if (fileCache.has(documentPath)) {
|
|
102
|
+
return fileCache.get(documentPath)!
|
|
103
|
+
}
|
|
104
|
+
const content = yield* Effect.promise(async () => {
|
|
105
|
+
try {
|
|
106
|
+
const filePath = path.join(rootPath, documentPath)
|
|
107
|
+
return await fs.readFile(filePath, 'utf-8')
|
|
108
|
+
} catch {
|
|
109
|
+
return null
|
|
110
|
+
}
|
|
111
|
+
})
|
|
112
|
+
fileCache.set(documentPath, content)
|
|
113
|
+
return content
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
// Check each result in parallel with concurrency limit
|
|
117
|
+
const checkedResults = yield* Effect.all(
|
|
118
|
+
results.map((result) =>
|
|
119
|
+
Effect.gen(function* () {
|
|
120
|
+
const info = getSectionInfo(result)
|
|
121
|
+
if (!info) return null
|
|
122
|
+
|
|
123
|
+
const fileContent = yield* getFileContent(info.documentPath)
|
|
124
|
+
if (!fileContent) return null
|
|
125
|
+
|
|
126
|
+
const lines = fileContent.split('\n')
|
|
127
|
+
const sectionContent = lines
|
|
128
|
+
.slice(info.startLine - 1, info.endLine)
|
|
129
|
+
.join('\n')
|
|
130
|
+
|
|
131
|
+
if (contentMatchesAllTerms(sectionContent, refineTerms)) {
|
|
132
|
+
return result
|
|
133
|
+
}
|
|
134
|
+
return null
|
|
135
|
+
}),
|
|
136
|
+
),
|
|
137
|
+
{ concurrency: 10 },
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
// Filter nulls and limit results
|
|
141
|
+
return checkedResults.filter((r): r is T => r !== null).slice(0, limit)
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
const promptUser = (message: string): Promise<string> => {
|
|
145
|
+
return new Promise((resolve) => {
|
|
146
|
+
const rl = readline.createInterface({
|
|
147
|
+
input: process.stdin,
|
|
148
|
+
output: process.stdout,
|
|
149
|
+
})
|
|
150
|
+
rl.question(message, (answer) => {
|
|
151
|
+
rl.close()
|
|
152
|
+
resolve(answer.trim().toLowerCase())
|
|
153
|
+
})
|
|
154
|
+
})
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export const searchCommand = Command.make(
|
|
158
|
+
'search',
|
|
159
|
+
{
|
|
160
|
+
query: Args.text({ name: 'query' }).pipe(
|
|
161
|
+
Args.withDescription('Search query (natural language or regex pattern)'),
|
|
162
|
+
),
|
|
163
|
+
path: Args.directory({ name: 'path' }).pipe(
|
|
164
|
+
Args.withDescription('Directory to search in'),
|
|
165
|
+
Args.withDefault('.'),
|
|
166
|
+
),
|
|
167
|
+
keyword: Options.boolean('keyword').pipe(
|
|
168
|
+
Options.withAlias('k'),
|
|
169
|
+
Options.withDescription('Force keyword search (content text match)'),
|
|
170
|
+
Options.withDefault(false),
|
|
171
|
+
),
|
|
172
|
+
headingOnly: Options.boolean('heading-only').pipe(
|
|
173
|
+
Options.withAlias('H'),
|
|
174
|
+
Options.withDescription('Search headings only (not content)'),
|
|
175
|
+
Options.withDefault(false),
|
|
176
|
+
),
|
|
177
|
+
mode: Options.choice('mode', ['hybrid', 'semantic', 'keyword']).pipe(
|
|
178
|
+
Options.withAlias('m'),
|
|
179
|
+
Options.withDescription(
|
|
180
|
+
'Search mode: hybrid (BM25+semantic), semantic, or keyword',
|
|
181
|
+
),
|
|
182
|
+
Options.optional,
|
|
183
|
+
),
|
|
184
|
+
limit: Options.integer('limit').pipe(
|
|
185
|
+
Options.withAlias('n'),
|
|
186
|
+
Options.withDescription('Maximum results'),
|
|
187
|
+
Options.withDefault(10),
|
|
188
|
+
),
|
|
189
|
+
threshold: Options.float('threshold').pipe(
|
|
190
|
+
Options.withDescription('Similarity threshold for semantic search (0-1)'),
|
|
191
|
+
Options.withDefault(0.35),
|
|
192
|
+
),
|
|
193
|
+
context: Options.integer('context').pipe(
|
|
194
|
+
Options.withAlias('C'),
|
|
195
|
+
Options.withDescription('Lines of context around matches (like grep -C)'),
|
|
196
|
+
Options.optional,
|
|
197
|
+
),
|
|
198
|
+
beforeContext: Options.integer('before-context').pipe(
|
|
199
|
+
Options.withAlias('B'),
|
|
200
|
+
Options.withDescription('Lines of context before matches (like grep -B)'),
|
|
201
|
+
Options.optional,
|
|
202
|
+
),
|
|
203
|
+
afterContext: Options.integer('after-context').pipe(
|
|
204
|
+
Options.withAlias('A'),
|
|
205
|
+
Options.withDescription('Lines of context after matches (like grep -A)'),
|
|
206
|
+
Options.optional,
|
|
207
|
+
),
|
|
208
|
+
autoIndexThreshold: Options.integer('auto-index-threshold').pipe(
|
|
209
|
+
Options.withDescription(
|
|
210
|
+
'Auto-create semantic index if estimated time is under this threshold (seconds)',
|
|
211
|
+
),
|
|
212
|
+
Options.optional,
|
|
213
|
+
),
|
|
214
|
+
provider: Options.choice('provider', [
|
|
215
|
+
'openai',
|
|
216
|
+
'ollama',
|
|
217
|
+
'lm-studio',
|
|
218
|
+
'openrouter',
|
|
219
|
+
'voyage',
|
|
220
|
+
]).pipe(
|
|
221
|
+
Options.withDescription(
|
|
222
|
+
'Embedding provider for semantic search: openai, ollama, lm-studio, openrouter, or voyage',
|
|
223
|
+
),
|
|
224
|
+
Options.optional,
|
|
225
|
+
),
|
|
226
|
+
rerank: Options.boolean('rerank').pipe(
|
|
227
|
+
Options.withAlias('r'),
|
|
228
|
+
Options.withDescription(
|
|
229
|
+
'Re-rank results using cross-encoder for improved precision. Downloads ~90MB model on first use. Requires @huggingface/transformers.',
|
|
230
|
+
),
|
|
231
|
+
Options.withDefault(false),
|
|
232
|
+
),
|
|
233
|
+
quality: Options.choice('quality', ['fast', 'balanced', 'thorough']).pipe(
|
|
234
|
+
Options.withAlias('q'),
|
|
235
|
+
Options.withDescription(
|
|
236
|
+
'Search quality mode: fast (quicker, lower recall), balanced (default), thorough (slower, better recall)',
|
|
237
|
+
),
|
|
238
|
+
Options.optional,
|
|
239
|
+
),
|
|
240
|
+
hyde: Options.boolean('hyde').pipe(
|
|
241
|
+
Options.withDescription(
|
|
242
|
+
'Use HyDE (Hypothetical Document Embeddings) for complex queries. Generates a hypothetical answer with LLM, then searches using that embedding. Improves recall 10-30% on complex/ambiguous queries at cost of ~1-2s latency and LLM API usage.',
|
|
243
|
+
),
|
|
244
|
+
Options.withDefault(false),
|
|
245
|
+
),
|
|
246
|
+
rerankInit: Options.boolean('rerank-init').pipe(
|
|
247
|
+
Options.withDescription(
|
|
248
|
+
'Pre-download the cross-encoder model (~90MB) for re-ranking. Use this before first search to avoid latency.',
|
|
249
|
+
),
|
|
250
|
+
Options.withDefault(false),
|
|
251
|
+
),
|
|
252
|
+
timeout: Options.integer('timeout').pipe(
|
|
253
|
+
Options.withDescription(
|
|
254
|
+
'Request timeout in milliseconds for embedding API calls (default: 30000)',
|
|
255
|
+
),
|
|
256
|
+
Options.optional,
|
|
257
|
+
),
|
|
258
|
+
json: jsonOption,
|
|
259
|
+
pretty: prettyOption,
|
|
260
|
+
summarize: Options.boolean('summarize').pipe(
|
|
261
|
+
Options.withAlias('s'),
|
|
262
|
+
Options.withDescription('Generate AI summary of search results'),
|
|
263
|
+
Options.withDefault(false),
|
|
264
|
+
),
|
|
265
|
+
yes: Options.boolean('yes').pipe(
|
|
266
|
+
Options.withAlias('y'),
|
|
267
|
+
Options.withDescription('Skip cost confirmation for paid AI providers'),
|
|
268
|
+
Options.withDefault(false),
|
|
269
|
+
),
|
|
270
|
+
stream: Options.boolean('stream').pipe(
|
|
271
|
+
Options.withDescription('Stream AI summary output in real-time'),
|
|
272
|
+
Options.withDefault(false),
|
|
273
|
+
),
|
|
274
|
+
fuzzy: Options.boolean('fuzzy').pipe(
|
|
275
|
+
Options.withAlias('f'),
|
|
276
|
+
Options.withDescription(
|
|
277
|
+
'Enable fuzzy matching for typo tolerance (e.g., "configration" matches "configuration")',
|
|
278
|
+
),
|
|
279
|
+
Options.withDefault(false),
|
|
280
|
+
),
|
|
281
|
+
stem: Options.boolean('stem').pipe(
|
|
282
|
+
Options.withDescription(
|
|
283
|
+
'Enable word stemming (e.g., "fail" matches "failure", "failed", "failing")',
|
|
284
|
+
),
|
|
285
|
+
Options.withDefault(false),
|
|
286
|
+
),
|
|
287
|
+
fuzzyDistance: Options.integer('fuzzy-distance').pipe(
|
|
288
|
+
Options.withDescription(
|
|
289
|
+
'Max edit distance for fuzzy matching (default: 2)',
|
|
290
|
+
),
|
|
291
|
+
Options.optional,
|
|
292
|
+
),
|
|
293
|
+
refine: Options.text('refine').pipe(
|
|
294
|
+
Options.withDescription(
|
|
295
|
+
'Additional filter terms to narrow results (can be used multiple times)',
|
|
296
|
+
),
|
|
297
|
+
Options.repeated,
|
|
298
|
+
),
|
|
299
|
+
},
|
|
300
|
+
({
|
|
301
|
+
query,
|
|
302
|
+
path: dirPath,
|
|
303
|
+
keyword,
|
|
304
|
+
headingOnly,
|
|
305
|
+
mode,
|
|
306
|
+
limit,
|
|
307
|
+
threshold,
|
|
308
|
+
context,
|
|
309
|
+
beforeContext,
|
|
310
|
+
afterContext,
|
|
311
|
+
autoIndexThreshold,
|
|
312
|
+
provider,
|
|
313
|
+
rerank,
|
|
314
|
+
quality,
|
|
315
|
+
hyde,
|
|
316
|
+
rerankInit,
|
|
317
|
+
timeout,
|
|
318
|
+
json,
|
|
319
|
+
pretty,
|
|
320
|
+
summarize,
|
|
321
|
+
yes,
|
|
322
|
+
stream,
|
|
323
|
+
fuzzy,
|
|
324
|
+
stem,
|
|
325
|
+
fuzzyDistance,
|
|
326
|
+
refine,
|
|
327
|
+
}) =>
|
|
328
|
+
Effect.gen(function* () {
|
|
329
|
+
const resolvedDir = path.resolve(dirPath)
|
|
330
|
+
|
|
331
|
+
// Handle --rerank-init: pre-download model and exit
|
|
332
|
+
if (rerankInit) {
|
|
333
|
+
yield* Console.log(
|
|
334
|
+
'Initializing cross-encoder model (~90MB download)...',
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
const cacheDir = path.join(resolvedDir, INDEX_DIR, 'models')
|
|
338
|
+
|
|
339
|
+
const result = yield* initializeReranker(cacheDir, (progress) => {
|
|
340
|
+
if (progress.status === 'loading' && progress.file) {
|
|
341
|
+
const pct = progress.progress
|
|
342
|
+
? ` (${Math.round(progress.progress)}%)`
|
|
343
|
+
: ''
|
|
344
|
+
process.stdout.write(`\r Downloading: ${progress.file}${pct}`)
|
|
345
|
+
}
|
|
346
|
+
}).pipe(
|
|
347
|
+
Effect.map(() => true),
|
|
348
|
+
Effect.catchTag('RerankerError', (e) => {
|
|
349
|
+
if (e.reason === 'DependencyMissing') {
|
|
350
|
+
return Effect.succeed(false)
|
|
351
|
+
}
|
|
352
|
+
return Effect.fail(e)
|
|
353
|
+
}),
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
if (!result) {
|
|
357
|
+
yield* Console.log('')
|
|
358
|
+
yield* Console.log('Error: @huggingface/transformers not installed.')
|
|
359
|
+
yield* Console.log(
|
|
360
|
+
'Install with: npm install @huggingface/transformers',
|
|
361
|
+
)
|
|
362
|
+
return
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
yield* Console.log('')
|
|
366
|
+
yield* Console.log('Cross-encoder model initialized successfully.')
|
|
367
|
+
yield* Console.log('Use --rerank on searches for improved precision.')
|
|
368
|
+
return
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Get configuration (with fallback to defaults if not available)
|
|
372
|
+
const config = yield* Effect.serviceOption(ConfigService).pipe(
|
|
373
|
+
Effect.map(Option.getOrElse(() => defaultConfig)),
|
|
374
|
+
)
|
|
375
|
+
const searchConfig = config.search
|
|
376
|
+
|
|
377
|
+
// Apply config-based defaults when CLI options use their static defaults
|
|
378
|
+
// Note: CLI options have static defaults for help text; config overrides those defaults
|
|
379
|
+
const effectiveLimit = limit === 10 ? searchConfig.defaultLimit : limit
|
|
380
|
+
const effectiveThreshold =
|
|
381
|
+
threshold === 0.35 ? searchConfig.minSimilarity : threshold
|
|
382
|
+
const effectiveAutoIndexThreshold = Option.getOrElse(
|
|
383
|
+
autoIndexThreshold,
|
|
384
|
+
() => searchConfig.autoIndexThreshold,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
// Get index info for display
|
|
388
|
+
const indexInfo = yield* Effect.promise(() => getIndexInfo(resolvedDir))
|
|
389
|
+
|
|
390
|
+
// Check if no index exists
|
|
391
|
+
if (!indexInfo.exists && !json) {
|
|
392
|
+
yield* Console.log('No index found.')
|
|
393
|
+
yield* Console.log('')
|
|
394
|
+
yield* Console.log('Run: mdcontext index /path/to/docs')
|
|
395
|
+
yield* Console.log(' Add --embed for semantic search capabilities')
|
|
396
|
+
return
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Determine the actual index root (may be a parent directory)
|
|
400
|
+
const indexRoot = indexInfo.indexRoot ?? resolvedDir
|
|
401
|
+
|
|
402
|
+
// Calculate path filter for scoped search
|
|
403
|
+
// If searching a subdirectory, filter results to that path
|
|
404
|
+
let scopedPathPattern: string | undefined
|
|
405
|
+
if (indexInfo.indexRoot && indexInfo.indexRoot !== resolvedDir) {
|
|
406
|
+
// Get relative path from index root to search dir
|
|
407
|
+
const relativePath = path.relative(indexRoot, resolvedDir)
|
|
408
|
+
// Create pattern to match files in this directory and subdirectories
|
|
409
|
+
scopedPathPattern = `${relativePath}/*`
|
|
410
|
+
if (!json) {
|
|
411
|
+
yield* Console.log(`Searching within: ${relativePath}/`)
|
|
412
|
+
yield* Console.log('')
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// Check available search modes
|
|
417
|
+
const searchModes = yield* detectSearchModes(indexRoot)
|
|
418
|
+
let embedsExist = searchModes.hasEmbeddings
|
|
419
|
+
|
|
420
|
+
// Determine search mode
|
|
421
|
+
// Priority: --mode flag > --keyword flag > advanced query > auto-detect
|
|
422
|
+
let effectiveMode: SearchMode
|
|
423
|
+
let modeReason: string
|
|
424
|
+
|
|
425
|
+
const modeValue = Option.getOrUndefined(mode)
|
|
426
|
+
|
|
427
|
+
if (modeValue === 'hybrid') {
|
|
428
|
+
effectiveMode = 'hybrid'
|
|
429
|
+
modeReason = '--mode hybrid'
|
|
430
|
+
} else if (modeValue === 'semantic') {
|
|
431
|
+
if (!embedsExist) {
|
|
432
|
+
embedsExist = yield* handleMissingEmbeddings(
|
|
433
|
+
indexRoot,
|
|
434
|
+
effectiveAutoIndexThreshold,
|
|
435
|
+
json,
|
|
436
|
+
)
|
|
437
|
+
if (!embedsExist) {
|
|
438
|
+
return
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
effectiveMode = 'semantic'
|
|
442
|
+
modeReason = '--mode semantic'
|
|
443
|
+
} else if (modeValue === 'keyword') {
|
|
444
|
+
effectiveMode = 'keyword'
|
|
445
|
+
modeReason = '--mode keyword'
|
|
446
|
+
} else if (keyword) {
|
|
447
|
+
effectiveMode = 'keyword'
|
|
448
|
+
modeReason = '--keyword flag'
|
|
449
|
+
} else if (isAdvancedQuery(query)) {
|
|
450
|
+
effectiveMode = 'keyword'
|
|
451
|
+
modeReason = 'boolean/phrase pattern detected'
|
|
452
|
+
} else if (isRegexPattern(query)) {
|
|
453
|
+
effectiveMode = 'keyword'
|
|
454
|
+
modeReason = 'regex pattern detected'
|
|
455
|
+
} else {
|
|
456
|
+
// Auto-detect best mode based on available indexes
|
|
457
|
+
effectiveMode = searchModes.recommendedMode
|
|
458
|
+
if (effectiveMode === 'hybrid') {
|
|
459
|
+
modeReason = 'both indexes available'
|
|
460
|
+
} else if (effectiveMode === 'semantic') {
|
|
461
|
+
modeReason = 'embeddings available'
|
|
462
|
+
} else {
|
|
463
|
+
modeReason = 'no embeddings'
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
const modeIndicator = `[${effectiveMode}]`
|
|
468
|
+
|
|
469
|
+
// Show index info (non-JSON mode)
|
|
470
|
+
if (!json && indexInfo.lastUpdated) {
|
|
471
|
+
const lastUpdatedDate = new Date(indexInfo.lastUpdated)
|
|
472
|
+
const dateStr = lastUpdatedDate.toLocaleDateString('en-CA')
|
|
473
|
+
const timeStr = lastUpdatedDate.toLocaleTimeString('en-US', {
|
|
474
|
+
hour: '2-digit',
|
|
475
|
+
minute: '2-digit',
|
|
476
|
+
hour12: false,
|
|
477
|
+
})
|
|
478
|
+
yield* Console.log(`Using index from ${dateStr} ${timeStr}`)
|
|
479
|
+
yield* Console.log(` Sections: ${indexInfo.sectionCount ?? 0}`)
|
|
480
|
+
if (indexInfo.embeddingsExist) {
|
|
481
|
+
yield* Console.log(
|
|
482
|
+
` Embeddings: yes (${indexInfo.vectorCount ?? 0} vectors)`,
|
|
483
|
+
)
|
|
484
|
+
} else {
|
|
485
|
+
yield* Console.log(' Embeddings: no')
|
|
486
|
+
}
|
|
487
|
+
yield* Console.log('')
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Calculate context lines
|
|
491
|
+
// -C sets both before and after; -B and -A override individual sides
|
|
492
|
+
const contextValue = Option.getOrUndefined(context)
|
|
493
|
+
const beforeValue = Option.getOrUndefined(beforeContext)
|
|
494
|
+
const afterValue = Option.getOrUndefined(afterContext)
|
|
495
|
+
|
|
496
|
+
const contextBefore = beforeValue ?? contextValue
|
|
497
|
+
const contextAfter = afterValue ?? contextValue
|
|
498
|
+
|
|
499
|
+
if (effectiveMode === 'hybrid') {
|
|
500
|
+
// Hybrid search - combines BM25 and semantic with RRF
|
|
501
|
+
const effectiveQuality = Option.getOrUndefined(quality) as
|
|
502
|
+
| SearchQuality
|
|
503
|
+
| undefined
|
|
504
|
+
// Get more results if refinement is needed (we'll filter down later)
|
|
505
|
+
const refineTerms = refine.length > 0 ? refine : []
|
|
506
|
+
const fetchLimit =
|
|
507
|
+
refineTerms.length > 0 ? effectiveLimit * 5 : effectiveLimit
|
|
508
|
+
|
|
509
|
+
const { results: rawResults, stats } = yield* hybridSearch(
|
|
510
|
+
indexRoot,
|
|
511
|
+
query,
|
|
512
|
+
{
|
|
513
|
+
limit: fetchLimit,
|
|
514
|
+
threshold: effectiveThreshold,
|
|
515
|
+
mode: 'hybrid',
|
|
516
|
+
rerank,
|
|
517
|
+
quality: effectiveQuality,
|
|
518
|
+
contextBefore,
|
|
519
|
+
contextAfter,
|
|
520
|
+
...(scopedPathPattern && { pathPattern: scopedPathPattern }),
|
|
521
|
+
},
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
// Apply refine filtering if terms provided (parallel with caching)
|
|
525
|
+
let results = rawResults
|
|
526
|
+
if (refineTerms.length > 0) {
|
|
527
|
+
const storage = createStorage(indexRoot)
|
|
528
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
529
|
+
|
|
530
|
+
if (sectionIndex) {
|
|
531
|
+
results = yield* filterResultsByRefineTerms(
|
|
532
|
+
indexRoot,
|
|
533
|
+
rawResults,
|
|
534
|
+
refineTerms,
|
|
535
|
+
effectiveLimit,
|
|
536
|
+
(result) => {
|
|
537
|
+
const section = sectionIndex.sections[result.sectionId]
|
|
538
|
+
return section
|
|
539
|
+
? {
|
|
540
|
+
documentPath: result.documentPath,
|
|
541
|
+
startLine: section.startLine,
|
|
542
|
+
endLine: section.endLine,
|
|
543
|
+
}
|
|
544
|
+
: null
|
|
545
|
+
},
|
|
546
|
+
)
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Warn if reranking was requested but not applied
|
|
551
|
+
if (rerank && !stats.reranked && !json) {
|
|
552
|
+
yield* Console.log(
|
|
553
|
+
'Note: --rerank requested but @huggingface/transformers not installed',
|
|
554
|
+
)
|
|
555
|
+
yield* Console.log(
|
|
556
|
+
' Install with: npm install @huggingface/transformers',
|
|
557
|
+
)
|
|
558
|
+
yield* Console.log('')
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
if (json) {
|
|
562
|
+
const moreAvailable =
|
|
563
|
+
stats.totalAvailable !== undefined &&
|
|
564
|
+
stats.totalAvailable > results.length
|
|
565
|
+
? stats.totalAvailable - results.length
|
|
566
|
+
: undefined
|
|
567
|
+
const output = {
|
|
568
|
+
mode: 'hybrid',
|
|
569
|
+
modeReason,
|
|
570
|
+
query,
|
|
571
|
+
stats,
|
|
572
|
+
moreAvailable,
|
|
573
|
+
results: results.map((r) => ({
|
|
574
|
+
path: r.documentPath,
|
|
575
|
+
heading: r.heading,
|
|
576
|
+
score: r.score,
|
|
577
|
+
similarity: r.similarity,
|
|
578
|
+
bm25Score: r.bm25Score,
|
|
579
|
+
sources: r.sources,
|
|
580
|
+
...(r.contextLines && { contextLines: r.contextLines }),
|
|
581
|
+
})),
|
|
582
|
+
}
|
|
583
|
+
yield* Console.log(formatJson(output, pretty))
|
|
584
|
+
} else {
|
|
585
|
+
const showReason = !modeReason.startsWith('--mode')
|
|
586
|
+
const modeStr = showReason
|
|
587
|
+
? `${modeIndicator} (${modeReason})`
|
|
588
|
+
: modeIndicator
|
|
589
|
+
yield* Console.log(`${modeStr} Searching: "${query}"`)
|
|
590
|
+
|
|
591
|
+
// Show results count with "more available" indicator if results were limited
|
|
592
|
+
const moreAvailable =
|
|
593
|
+
stats.totalAvailable !== undefined &&
|
|
594
|
+
stats.totalAvailable > results.length
|
|
595
|
+
? stats.totalAvailable - results.length
|
|
596
|
+
: 0
|
|
597
|
+
if (moreAvailable > 0) {
|
|
598
|
+
yield* Console.log(
|
|
599
|
+
`Results: ${results.length} (${moreAvailable} more available, use --limit to see more)`,
|
|
600
|
+
)
|
|
601
|
+
} else {
|
|
602
|
+
yield* Console.log(`Results: ${results.length}`)
|
|
603
|
+
}
|
|
604
|
+
yield* Console.log('')
|
|
605
|
+
|
|
606
|
+
for (const result of results) {
|
|
607
|
+
const sources = result.sources.join('+')
|
|
608
|
+
const score = (result.score * 100).toFixed(1)
|
|
609
|
+
yield* Console.log(` ${result.documentPath}`)
|
|
610
|
+
yield* Console.log(
|
|
611
|
+
` ${result.heading} (${score} RRF, ${sources})`,
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
if (result.contextLines && result.contextLines.length > 0) {
|
|
615
|
+
yield* Console.log('')
|
|
616
|
+
for (const ctxLine of result.contextLines) {
|
|
617
|
+
const marker = ctxLine.isMatch ? '>' : ' '
|
|
618
|
+
yield* Console.log(
|
|
619
|
+
` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
|
|
620
|
+
)
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
yield* Console.log('')
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
// Summarization for hybrid search
|
|
629
|
+
if (summarize && results.length > 0) {
|
|
630
|
+
const summarizableResults: SummarizableResult[] = results.map(
|
|
631
|
+
(r) => ({
|
|
632
|
+
documentPath: r.documentPath,
|
|
633
|
+
heading: r.heading,
|
|
634
|
+
score: r.score,
|
|
635
|
+
...(r.similarity !== undefined && { similarity: r.similarity }),
|
|
636
|
+
}),
|
|
637
|
+
)
|
|
638
|
+
yield* runSummarization({
|
|
639
|
+
results: summarizableResults,
|
|
640
|
+
query,
|
|
641
|
+
searchMode: 'hybrid',
|
|
642
|
+
json,
|
|
643
|
+
yes,
|
|
644
|
+
stream,
|
|
645
|
+
config: {
|
|
646
|
+
mode: config.aiSummarization.mode,
|
|
647
|
+
provider: config.aiSummarization.provider,
|
|
648
|
+
},
|
|
649
|
+
})
|
|
650
|
+
}
|
|
651
|
+
} else if (effectiveMode === 'keyword') {
|
|
652
|
+
// Keyword search - content by default, heading-only if flag set
|
|
653
|
+
const effectiveFuzzyDistance = Option.getOrUndefined(fuzzyDistance)
|
|
654
|
+
const refineTerms = refine.length > 0 ? refine : []
|
|
655
|
+
const fetchLimit =
|
|
656
|
+
refineTerms.length > 0 ? effectiveLimit * 5 : effectiveLimit
|
|
657
|
+
|
|
658
|
+
let results = headingOnly
|
|
659
|
+
? yield* search(indexRoot, {
|
|
660
|
+
heading: query,
|
|
661
|
+
limit: fetchLimit,
|
|
662
|
+
...(scopedPathPattern && { pathPattern: scopedPathPattern }),
|
|
663
|
+
})
|
|
664
|
+
: yield* searchContent(indexRoot, {
|
|
665
|
+
content: query,
|
|
666
|
+
limit: fetchLimit,
|
|
667
|
+
contextBefore,
|
|
668
|
+
contextAfter,
|
|
669
|
+
fuzzy,
|
|
670
|
+
stem,
|
|
671
|
+
...(effectiveFuzzyDistance !== undefined && {
|
|
672
|
+
fuzzyDistance: effectiveFuzzyDistance,
|
|
673
|
+
}),
|
|
674
|
+
...(scopedPathPattern && { pathPattern: scopedPathPattern }),
|
|
675
|
+
})
|
|
676
|
+
|
|
677
|
+
// Apply refine filtering if terms provided (parallel with caching)
|
|
678
|
+
if (refineTerms.length > 0) {
|
|
679
|
+
results = yield* filterResultsByRefineTerms(
|
|
680
|
+
indexRoot,
|
|
681
|
+
results,
|
|
682
|
+
refineTerms,
|
|
683
|
+
effectiveLimit,
|
|
684
|
+
(result) => ({
|
|
685
|
+
documentPath: result.section.documentPath,
|
|
686
|
+
startLine: result.section.startLine,
|
|
687
|
+
endLine: result.section.endLine,
|
|
688
|
+
}),
|
|
689
|
+
)
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
if (json) {
|
|
693
|
+
const output = {
|
|
694
|
+
mode: 'keyword',
|
|
695
|
+
modeReason,
|
|
696
|
+
query,
|
|
697
|
+
contextBefore,
|
|
698
|
+
contextAfter,
|
|
699
|
+
fuzzy,
|
|
700
|
+
stem,
|
|
701
|
+
...(effectiveFuzzyDistance !== undefined && {
|
|
702
|
+
fuzzyDistance: effectiveFuzzyDistance,
|
|
703
|
+
}),
|
|
704
|
+
results: results.map((r) => ({
|
|
705
|
+
path: r.section.documentPath,
|
|
706
|
+
heading: r.section.heading,
|
|
707
|
+
level: r.section.level,
|
|
708
|
+
tokens: r.section.tokenCount,
|
|
709
|
+
line: r.section.startLine,
|
|
710
|
+
matches: r.matches?.map((m) => ({
|
|
711
|
+
lineNumber: m.lineNumber,
|
|
712
|
+
line: m.line,
|
|
713
|
+
contextLines: m.contextLines,
|
|
714
|
+
})),
|
|
715
|
+
})),
|
|
716
|
+
}
|
|
717
|
+
yield* Console.log(formatJson(output, pretty))
|
|
718
|
+
} else {
|
|
719
|
+
const searchType = headingOnly ? 'Heading' : 'Content'
|
|
720
|
+
const showReason =
|
|
721
|
+
modeReason !== '--mode keyword' && modeReason !== '--keyword flag'
|
|
722
|
+
const modeStr = showReason
|
|
723
|
+
? `${modeIndicator} (${modeReason})`
|
|
724
|
+
: modeIndicator
|
|
725
|
+
// Build fuzzy/stem indicator
|
|
726
|
+
const fuzzyIndicators: string[] = []
|
|
727
|
+
if (fuzzy) fuzzyIndicators.push('fuzzy')
|
|
728
|
+
if (stem) fuzzyIndicators.push('stem')
|
|
729
|
+
const fuzzyStr =
|
|
730
|
+
fuzzyIndicators.length > 0 ? ` [${fuzzyIndicators.join('+')}]` : ''
|
|
731
|
+
yield* Console.log(
|
|
732
|
+
`${modeStr}${fuzzyStr} ${searchType} search: "${query}"`,
|
|
733
|
+
)
|
|
734
|
+
yield* Console.log(`Results: ${results.length}`)
|
|
735
|
+
yield* Console.log('')
|
|
736
|
+
|
|
737
|
+
for (const result of results) {
|
|
738
|
+
const levelMarker = '#'.repeat(result.section.level)
|
|
739
|
+
yield* Console.log(
|
|
740
|
+
` ${result.section.documentPath}:${result.section.startLine}`,
|
|
741
|
+
)
|
|
742
|
+
yield* Console.log(
|
|
743
|
+
` ${levelMarker} ${result.section.heading} (${result.section.tokenCount} tokens)`,
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
if (result.matches && result.matches.length > 0) {
|
|
747
|
+
yield* Console.log('')
|
|
748
|
+
for (const match of result.matches.slice(0, 3)) {
|
|
749
|
+
if (match.contextLines && match.contextLines.length > 0) {
|
|
750
|
+
for (const ctxLine of match.contextLines) {
|
|
751
|
+
const marker = ctxLine.isMatch ? '>' : ' '
|
|
752
|
+
yield* Console.log(
|
|
753
|
+
` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
|
|
754
|
+
)
|
|
755
|
+
}
|
|
756
|
+
} else {
|
|
757
|
+
yield* Console.log(` Line ${match.lineNumber}:`)
|
|
758
|
+
const snippetLines = match.snippet.split('\n')
|
|
759
|
+
for (const line of snippetLines) {
|
|
760
|
+
yield* Console.log(` ${line}`)
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
yield* Console.log('')
|
|
764
|
+
}
|
|
765
|
+
if (result.matches.length > 3) {
|
|
766
|
+
yield* Console.log(
|
|
767
|
+
` ... and ${result.matches.length - 3} more matches`,
|
|
768
|
+
)
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
yield* Console.log('')
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
if (!indexInfo.embeddingsExist) {
|
|
775
|
+
yield* Console.log(
|
|
776
|
+
"Tip: Run 'mdcontext index --embed' to enable semantic search",
|
|
777
|
+
)
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// Summarization for keyword search
|
|
782
|
+
if (summarize && results.length > 0) {
|
|
783
|
+
const summarizableResults: SummarizableResult[] = results.map(
|
|
784
|
+
(r) => ({
|
|
785
|
+
documentPath: r.section.documentPath,
|
|
786
|
+
heading: r.section.heading,
|
|
787
|
+
}),
|
|
788
|
+
)
|
|
789
|
+
yield* runSummarization({
|
|
790
|
+
results: summarizableResults,
|
|
791
|
+
query,
|
|
792
|
+
searchMode: 'keyword',
|
|
793
|
+
json,
|
|
794
|
+
yes,
|
|
795
|
+
stream,
|
|
796
|
+
config: {
|
|
797
|
+
mode: config.aiSummarization.mode,
|
|
798
|
+
provider: config.aiSummarization.provider,
|
|
799
|
+
},
|
|
800
|
+
})
|
|
801
|
+
}
|
|
802
|
+
} else {
|
|
803
|
+
// Build provider config from CLI flag if specified
|
|
804
|
+
const cliTimeout = Option.getOrUndefined(timeout)
|
|
805
|
+
const providerConfig = Option.isSome(provider)
|
|
806
|
+
? {
|
|
807
|
+
provider: provider.value as
|
|
808
|
+
| 'openai'
|
|
809
|
+
| 'ollama'
|
|
810
|
+
| 'lm-studio'
|
|
811
|
+
| 'openrouter'
|
|
812
|
+
| 'voyage',
|
|
813
|
+
timeout: cliTimeout,
|
|
814
|
+
}
|
|
815
|
+
: cliTimeout !== undefined
|
|
816
|
+
? { provider: 'openai' as const, timeout: cliTimeout }
|
|
817
|
+
: undefined
|
|
818
|
+
|
|
819
|
+
// Semantic search with stats for below-threshold feedback
|
|
820
|
+
const refineTerms = refine.length > 0 ? refine : []
|
|
821
|
+
const fetchLimit =
|
|
822
|
+
refineTerms.length > 0 ? effectiveLimit * 5 : effectiveLimit
|
|
823
|
+
|
|
824
|
+
const semanticQuality = Option.getOrUndefined(quality) as
|
|
825
|
+
| SearchQuality
|
|
826
|
+
| undefined
|
|
827
|
+
const searchResult = yield* semanticSearchWithStats(indexRoot, query, {
|
|
828
|
+
limit: fetchLimit,
|
|
829
|
+
threshold: effectiveThreshold,
|
|
830
|
+
providerConfig,
|
|
831
|
+
quality: semanticQuality,
|
|
832
|
+
hyde,
|
|
833
|
+
contextBefore,
|
|
834
|
+
contextAfter,
|
|
835
|
+
...(scopedPathPattern && { pathPattern: scopedPathPattern }),
|
|
836
|
+
})
|
|
837
|
+
let {
|
|
838
|
+
results,
|
|
839
|
+
belowThresholdCount,
|
|
840
|
+
belowThresholdHighest,
|
|
841
|
+
totalAvailable,
|
|
842
|
+
} = searchResult
|
|
843
|
+
|
|
844
|
+
// Apply refine filtering if terms provided (parallel with caching)
|
|
845
|
+
if (refineTerms.length > 0) {
|
|
846
|
+
const storage = createStorage(indexRoot)
|
|
847
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
848
|
+
|
|
849
|
+
if (sectionIndex) {
|
|
850
|
+
results = yield* filterResultsByRefineTerms(
|
|
851
|
+
indexRoot,
|
|
852
|
+
results,
|
|
853
|
+
refineTerms,
|
|
854
|
+
effectiveLimit,
|
|
855
|
+
(result) => {
|
|
856
|
+
const section = sectionIndex.sections[result.sectionId]
|
|
857
|
+
return section
|
|
858
|
+
? {
|
|
859
|
+
documentPath: result.documentPath,
|
|
860
|
+
startLine: section.startLine,
|
|
861
|
+
endLine: section.endLine,
|
|
862
|
+
}
|
|
863
|
+
: null
|
|
864
|
+
},
|
|
865
|
+
)
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
if (json) {
|
|
870
|
+
const moreAvailableSemantic =
|
|
871
|
+
totalAvailable !== undefined && totalAvailable > results.length
|
|
872
|
+
? totalAvailable - results.length
|
|
873
|
+
: undefined
|
|
874
|
+
const output = {
|
|
875
|
+
mode: 'semantic',
|
|
876
|
+
modeReason,
|
|
877
|
+
query,
|
|
878
|
+
hyde,
|
|
879
|
+
results,
|
|
880
|
+
belowThresholdCount,
|
|
881
|
+
belowThresholdHighest,
|
|
882
|
+
moreAvailable: moreAvailableSemantic,
|
|
883
|
+
}
|
|
884
|
+
yield* Console.log(formatJson(output, pretty))
|
|
885
|
+
} else {
|
|
886
|
+
const showSemanticReason = modeReason !== '--mode semantic'
|
|
887
|
+
const semanticModeStr = showSemanticReason
|
|
888
|
+
? `${modeIndicator} (${modeReason})`
|
|
889
|
+
: modeIndicator
|
|
890
|
+
const hydeIndicator = hyde ? ' [HyDE]' : ''
|
|
891
|
+
yield* Console.log(
|
|
892
|
+
`${semanticModeStr}${hydeIndicator} Semantic search: "${query}"`,
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
// Show results count with "more available" indicator if results were limited
|
|
896
|
+
const moreAvailableSemantic =
|
|
897
|
+
totalAvailable !== undefined && totalAvailable > results.length
|
|
898
|
+
? totalAvailable - results.length
|
|
899
|
+
: 0
|
|
900
|
+
if (moreAvailableSemantic > 0) {
|
|
901
|
+
yield* Console.log(
|
|
902
|
+
`Results: ${results.length} (${moreAvailableSemantic} more available, use --limit to see more)`,
|
|
903
|
+
)
|
|
904
|
+
} else {
|
|
905
|
+
yield* Console.log(`Results: ${results.length}`)
|
|
906
|
+
}
|
|
907
|
+
yield* Console.log('')
|
|
908
|
+
|
|
909
|
+
for (const result of results) {
|
|
910
|
+
const similarity = (result.similarity * 100).toFixed(1)
|
|
911
|
+
yield* Console.log(` ${result.documentPath}`)
|
|
912
|
+
yield* Console.log(` ${result.heading} (${similarity}% match)`)
|
|
913
|
+
|
|
914
|
+
if (result.contextLines && result.contextLines.length > 0) {
|
|
915
|
+
yield* Console.log('')
|
|
916
|
+
for (const ctxLine of result.contextLines) {
|
|
917
|
+
const marker = ctxLine.isMatch ? '>' : ' '
|
|
918
|
+
yield* Console.log(
|
|
919
|
+
` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
|
|
920
|
+
)
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
yield* Console.log('')
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
// Show below-threshold feedback when 0 results but content exists
|
|
928
|
+
if (
|
|
929
|
+
results.length === 0 &&
|
|
930
|
+
belowThresholdCount !== undefined &&
|
|
931
|
+
belowThresholdCount > 0 &&
|
|
932
|
+
belowThresholdHighest !== undefined
|
|
933
|
+
) {
|
|
934
|
+
const highestPct = (belowThresholdHighest * 100).toFixed(1)
|
|
935
|
+
const suggestedThreshold = Math.max(
|
|
936
|
+
0.1,
|
|
937
|
+
belowThresholdHighest - 0.05,
|
|
938
|
+
).toFixed(2)
|
|
939
|
+
yield* Console.log(
|
|
940
|
+
`Note: ${belowThresholdCount} results found below ${(effectiveThreshold * 100).toFixed(0)}% threshold (highest: ${highestPct}%)`,
|
|
941
|
+
)
|
|
942
|
+
yield* Console.log(
|
|
943
|
+
`Tip: Use --threshold ${suggestedThreshold} to see more results`,
|
|
944
|
+
)
|
|
945
|
+
yield* Console.log('')
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
yield* Console.log('Tip: Use --mode keyword for exact text matching')
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
// Summarization for semantic search
|
|
952
|
+
if (summarize && results.length > 0) {
|
|
953
|
+
const summarizableResults: SummarizableResult[] = results.map(
|
|
954
|
+
(r) => ({
|
|
955
|
+
documentPath: r.documentPath,
|
|
956
|
+
heading: r.heading,
|
|
957
|
+
similarity: r.similarity,
|
|
958
|
+
}),
|
|
959
|
+
)
|
|
960
|
+
yield* runSummarization({
|
|
961
|
+
results: summarizableResults,
|
|
962
|
+
query,
|
|
963
|
+
searchMode: 'semantic',
|
|
964
|
+
json,
|
|
965
|
+
yes,
|
|
966
|
+
stream,
|
|
967
|
+
config: {
|
|
968
|
+
mode: config.aiSummarization.mode,
|
|
969
|
+
provider: config.aiSummarization.provider,
|
|
970
|
+
},
|
|
971
|
+
})
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
}),
|
|
975
|
+
).pipe(Command.withDescription('Search by meaning or structure'))
|
|
976
|
+
|
|
977
|
+
/**
|
|
978
|
+
* Options for running AI summarization
|
|
979
|
+
*/
|
|
980
|
+
interface SummarizationOptions {
|
|
981
|
+
readonly results: readonly SummarizableResult[]
|
|
982
|
+
readonly query: string
|
|
983
|
+
readonly searchMode: 'hybrid' | 'semantic' | 'keyword'
|
|
984
|
+
readonly json: boolean
|
|
985
|
+
readonly yes: boolean
|
|
986
|
+
readonly stream: boolean
|
|
987
|
+
readonly config: {
|
|
988
|
+
readonly mode: 'cli' | 'api'
|
|
989
|
+
readonly provider: CLIProviderName | APIProviderName
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
/**
|
|
994
|
+
* Run AI summarization on search results.
|
|
995
|
+
* Handles cost estimation, user consent, and output formatting.
|
|
996
|
+
*
|
|
997
|
+
* GRACEFUL DEGRADATION: This function never fails - on error, it displays
|
|
998
|
+
* an error message and returns, allowing search results to still be shown.
|
|
999
|
+
*/
|
|
1000
|
+
const runSummarization = (
|
|
1001
|
+
options: SummarizationOptions,
|
|
1002
|
+
): Effect.Effect<void, never> =>
|
|
1003
|
+
runSummarizationUnsafe(options).pipe(
|
|
1004
|
+
Effect.catchAll((error) =>
|
|
1005
|
+
Effect.sync(() => {
|
|
1006
|
+
if (!options.json) {
|
|
1007
|
+
displaySummarizationError(error)
|
|
1008
|
+
}
|
|
1009
|
+
}),
|
|
1010
|
+
),
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
/**
|
|
1014
|
+
* Internal implementation that may fail.
|
|
1015
|
+
* Wrapped by runSummarization for graceful error handling.
|
|
1016
|
+
*/
|
|
1017
|
+
const runSummarizationUnsafe = (
|
|
1018
|
+
options: SummarizationOptions,
|
|
1019
|
+
): Effect.Effect<void, Error> =>
|
|
1020
|
+
Effect.gen(function* () {
|
|
1021
|
+
const { results, query, searchMode, json, yes, stream, config } = options
|
|
1022
|
+
|
|
1023
|
+
if (results.length === 0) {
|
|
1024
|
+
if (!json) {
|
|
1025
|
+
yield* Console.log('No results to summarize.')
|
|
1026
|
+
}
|
|
1027
|
+
return
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
// Get summarizer
|
|
1031
|
+
const summarizerData = yield* Effect.tryPromise({
|
|
1032
|
+
try: async () => {
|
|
1033
|
+
const result = await getBestAvailableSummarizer({
|
|
1034
|
+
mode: config.mode,
|
|
1035
|
+
provider: config.provider,
|
|
1036
|
+
})
|
|
1037
|
+
if (!result) {
|
|
1038
|
+
throw new Error('No summarization providers available')
|
|
1039
|
+
}
|
|
1040
|
+
return result
|
|
1041
|
+
},
|
|
1042
|
+
catch: (e) => new Error(`Failed to get summarizer: ${e}`),
|
|
1043
|
+
})
|
|
1044
|
+
|
|
1045
|
+
const { summarizer, config: resolvedConfig } = summarizerData
|
|
1046
|
+
|
|
1047
|
+
// Format results for summary input
|
|
1048
|
+
const resultsText = formatResultsForSummary(results)
|
|
1049
|
+
|
|
1050
|
+
// Estimate cost
|
|
1051
|
+
const costEstimate = estimateSummaryCost(
|
|
1052
|
+
resultsText,
|
|
1053
|
+
resolvedConfig.mode,
|
|
1054
|
+
resolvedConfig.provider,
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
// Display cost info
|
|
1058
|
+
if (!json) {
|
|
1059
|
+
if (costEstimate.isPaid) {
|
|
1060
|
+
yield* Console.log('')
|
|
1061
|
+
yield* Console.log('Cost Estimate:')
|
|
1062
|
+
yield* Console.log(` Provider: ${costEstimate.provider}`)
|
|
1063
|
+
yield* Console.log(
|
|
1064
|
+
` Input tokens: ~${costEstimate.inputTokens.toLocaleString()}`,
|
|
1065
|
+
)
|
|
1066
|
+
yield* Console.log(
|
|
1067
|
+
` Output tokens: ~${costEstimate.outputTokens.toLocaleString()}`,
|
|
1068
|
+
)
|
|
1069
|
+
yield* Console.log(` Estimated cost: ${costEstimate.formattedCost}`)
|
|
1070
|
+
|
|
1071
|
+
// Get user consent if needed
|
|
1072
|
+
if (!yes) {
|
|
1073
|
+
const answer = yield* Effect.promise(() =>
|
|
1074
|
+
promptUser('Continue with summarization? [Y/n]: '),
|
|
1075
|
+
)
|
|
1076
|
+
if (answer === 'n' || answer === 'no') {
|
|
1077
|
+
yield* Console.log('Summarization cancelled.')
|
|
1078
|
+
return
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
} else {
|
|
1082
|
+
yield* Console.log('')
|
|
1083
|
+
yield* Console.log(
|
|
1084
|
+
`Using ${resolvedConfig.provider} (subscription - FREE)`,
|
|
1085
|
+
)
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
// Build prompt
|
|
1090
|
+
const prompt = buildPrompt({
|
|
1091
|
+
query,
|
|
1092
|
+
resultCount: results.length,
|
|
1093
|
+
searchMode,
|
|
1094
|
+
})
|
|
1095
|
+
|
|
1096
|
+
// Generate summary
|
|
1097
|
+
if (!json) {
|
|
1098
|
+
yield* Console.log('')
|
|
1099
|
+
yield* Console.log('--- AI Summary ---')
|
|
1100
|
+
yield* Console.log('')
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
const startTime = Date.now()
|
|
1104
|
+
|
|
1105
|
+
if (stream && 'summarizeStream' in summarizer) {
|
|
1106
|
+
// Streaming output
|
|
1107
|
+
yield* Effect.tryPromise({
|
|
1108
|
+
try: () =>
|
|
1109
|
+
(
|
|
1110
|
+
summarizer as {
|
|
1111
|
+
summarizeStream: (
|
|
1112
|
+
input: string,
|
|
1113
|
+
prompt: string,
|
|
1114
|
+
options: { onChunk: (chunk: string) => void },
|
|
1115
|
+
) => Promise<void>
|
|
1116
|
+
}
|
|
1117
|
+
).summarizeStream(resultsText, prompt, {
|
|
1118
|
+
onChunk: (chunk) => {
|
|
1119
|
+
process.stdout.write(chunk)
|
|
1120
|
+
},
|
|
1121
|
+
}),
|
|
1122
|
+
catch: (e) => new Error(`Summarization failed: ${e}`),
|
|
1123
|
+
})
|
|
1124
|
+
if (!json) {
|
|
1125
|
+
yield* Console.log('') // Final newline
|
|
1126
|
+
}
|
|
1127
|
+
} else {
|
|
1128
|
+
// Non-streaming output
|
|
1129
|
+
const summaryResult = yield* Effect.tryPromise({
|
|
1130
|
+
try: () => summarizer.summarize(resultsText, prompt),
|
|
1131
|
+
catch: (e) => new Error(`Summarization failed: ${e}`),
|
|
1132
|
+
})
|
|
1133
|
+
|
|
1134
|
+
if (json) {
|
|
1135
|
+
yield* Console.log(
|
|
1136
|
+
JSON.stringify(
|
|
1137
|
+
{
|
|
1138
|
+
summary: summaryResult.summary,
|
|
1139
|
+
provider: summaryResult.provider,
|
|
1140
|
+
mode: summaryResult.mode,
|
|
1141
|
+
durationMs: summaryResult.durationMs,
|
|
1142
|
+
cost: costEstimate.isPaid ? costEstimate.formattedCost : 'FREE',
|
|
1143
|
+
},
|
|
1144
|
+
null,
|
|
1145
|
+
2,
|
|
1146
|
+
),
|
|
1147
|
+
)
|
|
1148
|
+
} else {
|
|
1149
|
+
yield* Console.log(summaryResult.summary)
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
const durationMs = Date.now() - startTime
|
|
1154
|
+
if (!json) {
|
|
1155
|
+
yield* Console.log('')
|
|
1156
|
+
yield* Console.log('------------------')
|
|
1157
|
+
yield* Console.log(
|
|
1158
|
+
`Generated in ${(durationMs / 1000).toFixed(1)}s | ${costEstimate.isPaid ? costEstimate.formattedCost : 'FREE'}`,
|
|
1159
|
+
)
|
|
1160
|
+
}
|
|
1161
|
+
})
|
|
1162
|
+
|
|
1163
|
+
/**
|
|
1164
|
+
* Handle the case when embeddings don't exist.
|
|
1165
|
+
* Returns true if embeddings were created (or already exist), false to fall back to keyword search.
|
|
1166
|
+
*/
|
|
1167
|
+
const handleMissingEmbeddings = (
|
|
1168
|
+
resolvedDir: string,
|
|
1169
|
+
autoIndexThreshold: number,
|
|
1170
|
+
json: boolean,
|
|
1171
|
+
): Effect.Effect<boolean, Error> =>
|
|
1172
|
+
Effect.gen(function* () {
|
|
1173
|
+
// Get cost estimate
|
|
1174
|
+
// Note: We gracefully handle errors since this is an optional auto-index feature.
|
|
1175
|
+
// IndexNotFoundError is expected if index doesn't exist.
|
|
1176
|
+
const estimate = yield* estimateEmbeddingCost(resolvedDir).pipe(
|
|
1177
|
+
Effect.map((r): EmbeddingEstimate | null => r),
|
|
1178
|
+
Effect.catchTags(createCostEstimateErrorHandler()),
|
|
1179
|
+
)
|
|
1180
|
+
|
|
1181
|
+
if (!estimate) {
|
|
1182
|
+
yield* Console.error(
|
|
1183
|
+
'No semantic index found and could not estimate cost.',
|
|
1184
|
+
)
|
|
1185
|
+
yield* Console.error('Run "mdcontext index --embed" first.')
|
|
1186
|
+
return false
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
// Check if we should auto-index
|
|
1190
|
+
if (estimate.estimatedTimeSeconds <= autoIndexThreshold) {
|
|
1191
|
+
if (!json) {
|
|
1192
|
+
yield* Console.log(
|
|
1193
|
+
`Creating semantic index (~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})...`,
|
|
1194
|
+
)
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
// Note: Graceful degradation - embedding errors fall back to keyword search
|
|
1198
|
+
const result = yield* buildEmbeddings(resolvedDir, {
|
|
1199
|
+
force: false,
|
|
1200
|
+
onFileProgress: (progress) => {
|
|
1201
|
+
if (!json) {
|
|
1202
|
+
console.log(
|
|
1203
|
+
` [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}`,
|
|
1204
|
+
)
|
|
1205
|
+
}
|
|
1206
|
+
},
|
|
1207
|
+
}).pipe(
|
|
1208
|
+
Effect.map((r): BuildEmbeddingsResult | null => r),
|
|
1209
|
+
Effect.catchTags(createEmbeddingErrorHandler({ silent: json })),
|
|
1210
|
+
)
|
|
1211
|
+
|
|
1212
|
+
if (!result) {
|
|
1213
|
+
return false
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
if (!json) {
|
|
1217
|
+
yield* Console.log(
|
|
1218
|
+
`Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
|
|
1219
|
+
)
|
|
1220
|
+
yield* Console.log('')
|
|
1221
|
+
}
|
|
1222
|
+
|
|
1223
|
+
return true
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
// Prompt user for larger indexes
|
|
1227
|
+
if (!json) {
|
|
1228
|
+
yield* Console.log('')
|
|
1229
|
+
yield* Console.log('No semantic index found.')
|
|
1230
|
+
yield* Console.log('')
|
|
1231
|
+
yield* Console.log('Options:')
|
|
1232
|
+
yield* Console.log(
|
|
1233
|
+
` 1. Create now (recommended, ~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})`,
|
|
1234
|
+
)
|
|
1235
|
+
yield* Console.log(' 2. Use keyword search instead')
|
|
1236
|
+
yield* Console.log('')
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
const answer = yield* Effect.promise(() => promptUser('Choice [1]: '))
|
|
1240
|
+
const choice = answer === '' || answer === '1' ? '1' : answer
|
|
1241
|
+
|
|
1242
|
+
if (choice === '1') {
|
|
1243
|
+
if (!json) {
|
|
1244
|
+
yield* Console.log('')
|
|
1245
|
+
yield* Console.log('Building embeddings...')
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
// Note: Graceful degradation - embedding errors fall back to keyword search
|
|
1249
|
+
const result = yield* buildEmbeddings(resolvedDir, {
|
|
1250
|
+
force: false,
|
|
1251
|
+
onFileProgress: (progress) => {
|
|
1252
|
+
if (!json) {
|
|
1253
|
+
console.log(
|
|
1254
|
+
` [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}`,
|
|
1255
|
+
)
|
|
1256
|
+
}
|
|
1257
|
+
},
|
|
1258
|
+
}).pipe(
|
|
1259
|
+
Effect.map((r): BuildEmbeddingsResult | null => r),
|
|
1260
|
+
Effect.catchTags(createEmbeddingErrorHandler({ silent: json })),
|
|
1261
|
+
)
|
|
1262
|
+
|
|
1263
|
+
if (!result) {
|
|
1264
|
+
return false
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
if (!json) {
|
|
1268
|
+
yield* Console.log(
|
|
1269
|
+
`Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
|
|
1270
|
+
)
|
|
1271
|
+
yield* Console.log('')
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
return true
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
// User chose keyword search
|
|
1278
|
+
yield* Console.log('')
|
|
1279
|
+
yield* Console.log('Falling back to keyword search.')
|
|
1280
|
+
return false
|
|
1281
|
+
})
|