mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
package/src/search/searcher.ts
CHANGED
|
@@ -6,12 +6,25 @@ import * as fs from 'node:fs/promises'
|
|
|
6
6
|
import * as path from 'node:path'
|
|
7
7
|
import { Effect } from 'effect'
|
|
8
8
|
|
|
9
|
+
import {
|
|
10
|
+
DocumentNotFoundError,
|
|
11
|
+
type FileReadError,
|
|
12
|
+
type IndexCorruptedError,
|
|
13
|
+
IndexNotFoundError,
|
|
14
|
+
} from '../errors/index.js'
|
|
9
15
|
import {
|
|
10
16
|
createStorage,
|
|
11
17
|
loadDocumentIndex,
|
|
12
18
|
loadSectionIndex,
|
|
13
19
|
} from '../index/storage.js'
|
|
14
20
|
import type { DocumentEntry, SectionEntry } from '../index/types.js'
|
|
21
|
+
import {
|
|
22
|
+
buildFuzzyHighlightPattern,
|
|
23
|
+
findMatchesInLine,
|
|
24
|
+
type MatchOptions,
|
|
25
|
+
matchesWithOptions,
|
|
26
|
+
} from './fuzzy-search.js'
|
|
27
|
+
import { matchPath } from './path-matcher.js'
|
|
15
28
|
import {
|
|
16
29
|
buildHighlightPattern,
|
|
17
30
|
evaluateQuery,
|
|
@@ -47,6 +60,12 @@ export interface SearchOptions {
|
|
|
47
60
|
readonly contextBefore?: number | undefined
|
|
48
61
|
/** Lines of context after matches */
|
|
49
62
|
readonly contextAfter?: number | undefined
|
|
63
|
+
/** Enable fuzzy matching with typo tolerance */
|
|
64
|
+
readonly fuzzy?: boolean | undefined
|
|
65
|
+
/** Max edit distance for fuzzy matching (default: 2) */
|
|
66
|
+
readonly fuzzyDistance?: number | undefined
|
|
67
|
+
/** Enable word stemming (fail matches failure, failed, etc.) */
|
|
68
|
+
readonly stem?: boolean | undefined
|
|
50
69
|
}
|
|
51
70
|
|
|
52
71
|
export interface ContentMatch {
|
|
@@ -77,29 +96,27 @@ export interface SearchResult {
|
|
|
77
96
|
readonly matches?: readonly ContentMatch[]
|
|
78
97
|
}
|
|
79
98
|
|
|
80
|
-
// ============================================================================
|
|
81
|
-
// Path Matching
|
|
82
|
-
// ============================================================================
|
|
83
|
-
|
|
84
|
-
const matchPath = (filePath: string, pattern: string): boolean => {
|
|
85
|
-
// Simple glob-like matching
|
|
86
|
-
const regexPattern = pattern
|
|
87
|
-
.replace(/\./g, '\\.')
|
|
88
|
-
.replace(/\*/g, '.*')
|
|
89
|
-
.replace(/\?/g, '.')
|
|
90
|
-
|
|
91
|
-
const regex = new RegExp(`^${regexPattern}$`, 'i')
|
|
92
|
-
return regex.test(filePath)
|
|
93
|
-
}
|
|
94
|
-
|
|
95
99
|
// ============================================================================
|
|
96
100
|
// Search Implementation
|
|
97
101
|
// ============================================================================
|
|
98
102
|
|
|
103
|
+
/**
|
|
104
|
+
* Search for sections by metadata (heading, path, content flags).
|
|
105
|
+
*
|
|
106
|
+
* @param rootPath - Root directory containing indexed markdown files
|
|
107
|
+
* @param options - Search filters (heading, path pattern, code/list/table flags)
|
|
108
|
+
* @returns Matching sections
|
|
109
|
+
*
|
|
110
|
+
* @throws FileReadError - Cannot read index files
|
|
111
|
+
* @throws IndexCorruptedError - Index files are corrupted
|
|
112
|
+
*/
|
|
99
113
|
export const search = (
|
|
100
114
|
rootPath: string,
|
|
101
115
|
options: SearchOptions = {},
|
|
102
|
-
): Effect.Effect<
|
|
116
|
+
): Effect.Effect<
|
|
117
|
+
readonly SearchResult[],
|
|
118
|
+
FileReadError | IndexCorruptedError
|
|
119
|
+
> =>
|
|
103
120
|
Effect.gen(function* () {
|
|
104
121
|
const storage = createStorage(rootPath)
|
|
105
122
|
|
|
@@ -184,11 +201,21 @@ export const search = (
|
|
|
184
201
|
* Search within section content.
|
|
185
202
|
* Supports boolean operators (AND, OR, NOT) and quoted phrases.
|
|
186
203
|
* Falls back to regex for simple patterns.
|
|
204
|
+
*
|
|
205
|
+
* @param rootPath - Root directory containing indexed markdown files
|
|
206
|
+
* @param options - Search options including content pattern
|
|
207
|
+
* @returns Matching sections with match highlights
|
|
208
|
+
*
|
|
209
|
+
* @throws FileReadError - Cannot read index or source files
|
|
210
|
+
* @throws IndexCorruptedError - Index files are corrupted
|
|
187
211
|
*/
|
|
188
212
|
export const searchContent = (
|
|
189
213
|
rootPath: string,
|
|
190
214
|
options: SearchOptions = {},
|
|
191
|
-
): Effect.Effect<
|
|
215
|
+
): Effect.Effect<
|
|
216
|
+
readonly SearchResult[],
|
|
217
|
+
FileReadError | IndexCorruptedError
|
|
218
|
+
> =>
|
|
192
219
|
Effect.gen(function* () {
|
|
193
220
|
const storage = createStorage(rootPath)
|
|
194
221
|
|
|
@@ -204,16 +231,38 @@ export const searchContent = (
|
|
|
204
231
|
let contentRegex: RegExp | null = null
|
|
205
232
|
let highlightRegex: RegExp | null = null
|
|
206
233
|
|
|
234
|
+
// Configure fuzzy/stem matching options
|
|
235
|
+
const matchOptions: MatchOptions = {
|
|
236
|
+
stem: options.stem,
|
|
237
|
+
fuzzyDistance: options.fuzzy ? (options.fuzzyDistance ?? 2) : undefined,
|
|
238
|
+
}
|
|
239
|
+
const useFuzzyOrStem = options.fuzzy || options.stem
|
|
240
|
+
|
|
207
241
|
if (options.content) {
|
|
208
242
|
if (isAdvancedQuery(options.content)) {
|
|
209
243
|
parsedQuery = parseQuery(options.content)
|
|
210
244
|
if (parsedQuery) {
|
|
211
|
-
|
|
245
|
+
if (useFuzzyOrStem) {
|
|
246
|
+
highlightRegex = buildFuzzyHighlightPattern(
|
|
247
|
+
options.content,
|
|
248
|
+
matchOptions,
|
|
249
|
+
)
|
|
250
|
+
} else {
|
|
251
|
+
highlightRegex = buildHighlightPattern(parsedQuery)
|
|
252
|
+
}
|
|
212
253
|
}
|
|
213
254
|
} else {
|
|
214
|
-
// Simple search - use
|
|
215
|
-
|
|
216
|
-
|
|
255
|
+
// Simple search - use regex for exact match, or fuzzy/stem matching
|
|
256
|
+
if (!useFuzzyOrStem) {
|
|
257
|
+
contentRegex = new RegExp(options.content, 'gi')
|
|
258
|
+
highlightRegex = contentRegex
|
|
259
|
+
} else {
|
|
260
|
+
// For fuzzy/stem mode, build a highlight pattern
|
|
261
|
+
highlightRegex = buildFuzzyHighlightPattern(
|
|
262
|
+
options.content,
|
|
263
|
+
matchOptions,
|
|
264
|
+
)
|
|
265
|
+
}
|
|
217
266
|
}
|
|
218
267
|
}
|
|
219
268
|
|
|
@@ -248,7 +297,11 @@ export const searchContent = (
|
|
|
248
297
|
let fileContent: string | null = null
|
|
249
298
|
let fileLines: string[] = []
|
|
250
299
|
|
|
251
|
-
if
|
|
300
|
+
// Need to load file if we have any content matching to do:
|
|
301
|
+
// - parsedQuery: boolean query evaluation
|
|
302
|
+
// - contentRegex: regex matching
|
|
303
|
+
// - useFuzzyOrStem: fuzzy/stem matching
|
|
304
|
+
if (parsedQuery || contentRegex || (useFuzzyOrStem && options.content)) {
|
|
252
305
|
const filePath = path.join(storage.rootPath, docPath)
|
|
253
306
|
try {
|
|
254
307
|
fileContent = yield* Effect.promise(() =>
|
|
@@ -299,7 +352,7 @@ export const searchContent = (
|
|
|
299
352
|
}
|
|
300
353
|
|
|
301
354
|
// Content search
|
|
302
|
-
if ((parsedQuery || contentRegex) && fileContent) {
|
|
355
|
+
if ((parsedQuery || contentRegex || useFuzzyOrStem) && fileContent) {
|
|
303
356
|
const sectionLines = fileLines.slice(
|
|
304
357
|
section.startLine - 1,
|
|
305
358
|
section.endLine,
|
|
@@ -313,6 +366,15 @@ export const searchContent = (
|
|
|
313
366
|
}
|
|
314
367
|
}
|
|
315
368
|
|
|
369
|
+
// For fuzzy/stem mode without boolean query, check section content
|
|
370
|
+
if (useFuzzyOrStem && !parsedQuery && options.content) {
|
|
371
|
+
if (
|
|
372
|
+
!matchesWithOptions(options.content, sectionContent, matchOptions)
|
|
373
|
+
) {
|
|
374
|
+
continue // Section doesn't match with fuzzy/stem
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
316
378
|
// Find individual line matches for highlighting
|
|
317
379
|
const matches: ContentMatch[] = []
|
|
318
380
|
const searchRegex = contentRegex || highlightRegex
|
|
@@ -321,47 +383,72 @@ export const searchContent = (
|
|
|
321
383
|
const contextBefore = options.contextBefore ?? 1
|
|
322
384
|
const contextAfter = options.contextAfter ?? 1
|
|
323
385
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
i + contextAfter + 1,
|
|
338
|
-
)
|
|
339
|
-
const snippetLines = sectionLines.slice(
|
|
340
|
-
snippetStart,
|
|
341
|
-
snippetEnd,
|
|
342
|
-
)
|
|
343
|
-
const snippet = snippetLines.join('\n')
|
|
344
|
-
|
|
345
|
-
// Build context lines array for JSON output
|
|
346
|
-
const contextLines: ContextLine[] = []
|
|
347
|
-
for (let j = snippetStart; j < snippetEnd; j++) {
|
|
348
|
-
const ctxLine = sectionLines[j]
|
|
349
|
-
if (ctxLine !== undefined) {
|
|
350
|
-
contextLines.push({
|
|
351
|
-
lineNumber: section.startLine + j,
|
|
352
|
-
line: ctxLine,
|
|
353
|
-
isMatch: j === i,
|
|
354
|
-
})
|
|
355
|
-
}
|
|
356
|
-
}
|
|
386
|
+
// Get query words for fuzzy/stem matching
|
|
387
|
+
const queryWords = options.content
|
|
388
|
+
? options.content
|
|
389
|
+
.toLowerCase()
|
|
390
|
+
.split(/\W+/)
|
|
391
|
+
.filter((w) => w.length > 0)
|
|
392
|
+
: []
|
|
393
|
+
|
|
394
|
+
for (let i = 0; i < sectionLines.length; i++) {
|
|
395
|
+
const line = sectionLines[i]
|
|
396
|
+
if (!line) continue
|
|
397
|
+
|
|
398
|
+
let isMatch = false
|
|
357
399
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
contextLines,
|
|
363
|
-
})
|
|
400
|
+
// Check with regex for exact match mode
|
|
401
|
+
if (searchRegex) {
|
|
402
|
+
if (searchRegex.test(line)) {
|
|
403
|
+
isMatch = true
|
|
364
404
|
}
|
|
405
|
+
// Reset regex lastIndex for next test
|
|
406
|
+
searchRegex.lastIndex = 0
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Check with fuzzy/stem matching
|
|
410
|
+
if (!isMatch && useFuzzyOrStem && queryWords.length > 0) {
|
|
411
|
+
const lineMatches = findMatchesInLine(
|
|
412
|
+
queryWords,
|
|
413
|
+
line,
|
|
414
|
+
matchOptions,
|
|
415
|
+
)
|
|
416
|
+
if (lineMatches.length > 0) {
|
|
417
|
+
isMatch = true
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (isMatch) {
|
|
422
|
+
const absoluteLineNum = section.startLine + i
|
|
423
|
+
|
|
424
|
+
// Create snippet with configurable context
|
|
425
|
+
const snippetStart = Math.max(0, i - contextBefore)
|
|
426
|
+
const snippetEnd = Math.min(
|
|
427
|
+
sectionLines.length,
|
|
428
|
+
i + contextAfter + 1,
|
|
429
|
+
)
|
|
430
|
+
const snippetLines = sectionLines.slice(snippetStart, snippetEnd)
|
|
431
|
+
const snippet = snippetLines.join('\n')
|
|
432
|
+
|
|
433
|
+
// Build context lines array for JSON output
|
|
434
|
+
const contextLines: ContextLine[] = []
|
|
435
|
+
for (let j = snippetStart; j < snippetEnd; j++) {
|
|
436
|
+
const ctxLine = sectionLines[j]
|
|
437
|
+
if (ctxLine !== undefined) {
|
|
438
|
+
contextLines.push({
|
|
439
|
+
lineNumber: section.startLine + j,
|
|
440
|
+
line: ctxLine,
|
|
441
|
+
isMatch: j === i,
|
|
442
|
+
})
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
matches.push({
|
|
447
|
+
lineNumber: absoluteLineNum,
|
|
448
|
+
line: line,
|
|
449
|
+
snippet,
|
|
450
|
+
contextLines,
|
|
451
|
+
})
|
|
365
452
|
}
|
|
366
453
|
}
|
|
367
454
|
|
|
@@ -386,7 +473,7 @@ export const searchContent = (
|
|
|
386
473
|
return results
|
|
387
474
|
}
|
|
388
475
|
}
|
|
389
|
-
} else if (!parsedQuery && !contentRegex) {
|
|
476
|
+
} else if (!parsedQuery && !contentRegex && !useFuzzyOrStem) {
|
|
390
477
|
// No content search, heading-only search
|
|
391
478
|
results.push({ section, document })
|
|
392
479
|
|
|
@@ -404,10 +491,23 @@ export const searchContent = (
|
|
|
404
491
|
// Search with Content (legacy, uses heading-only search)
|
|
405
492
|
// ============================================================================
|
|
406
493
|
|
|
494
|
+
/**
|
|
495
|
+
* Search for sections by metadata and include section content.
|
|
496
|
+
*
|
|
497
|
+
* @param rootPath - Root directory containing indexed markdown files
|
|
498
|
+
* @param options - Search filters
|
|
499
|
+
* @returns Matching sections with content
|
|
500
|
+
*
|
|
501
|
+
* @throws FileReadError - Cannot read index or source files
|
|
502
|
+
* @throws IndexCorruptedError - Index files are corrupted
|
|
503
|
+
*/
|
|
407
504
|
export const searchWithContent = (
|
|
408
505
|
rootPath: string,
|
|
409
506
|
options: SearchOptions = {},
|
|
410
|
-
): Effect.Effect<
|
|
507
|
+
): Effect.Effect<
|
|
508
|
+
readonly SearchResult[],
|
|
509
|
+
FileReadError | IndexCorruptedError
|
|
510
|
+
> =>
|
|
411
511
|
Effect.gen(function* () {
|
|
412
512
|
const storage = createStorage(rootPath)
|
|
413
513
|
const results = yield* search(rootPath, options)
|
|
@@ -471,11 +571,30 @@ export interface SectionContext {
|
|
|
471
571
|
readonly hasTable: boolean
|
|
472
572
|
}
|
|
473
573
|
|
|
574
|
+
/**
|
|
575
|
+
* Get context information for a document.
|
|
576
|
+
*
|
|
577
|
+
* @param rootPath - Root directory containing indexed markdown files
|
|
578
|
+
* @param filePath - Path to the document
|
|
579
|
+
* @param options - Context options (max tokens, include content)
|
|
580
|
+
* @returns Document context with sections
|
|
581
|
+
*
|
|
582
|
+
* @throws IndexNotFoundError - Index doesn't exist
|
|
583
|
+
* @throws DocumentNotFoundError - Document not in index
|
|
584
|
+
* @throws FileReadError - Cannot read index or source files
|
|
585
|
+
* @throws IndexCorruptedError - Index files are corrupted
|
|
586
|
+
*/
|
|
474
587
|
export const getContext = (
|
|
475
588
|
rootPath: string,
|
|
476
589
|
filePath: string,
|
|
477
590
|
options: ContextOptions = {},
|
|
478
|
-
): Effect.Effect<
|
|
591
|
+
): Effect.Effect<
|
|
592
|
+
DocumentContext,
|
|
593
|
+
| IndexNotFoundError
|
|
594
|
+
| DocumentNotFoundError
|
|
595
|
+
| FileReadError
|
|
596
|
+
| IndexCorruptedError
|
|
597
|
+
> =>
|
|
479
598
|
Effect.gen(function* () {
|
|
480
599
|
const storage = createStorage(rootPath)
|
|
481
600
|
const resolvedFile = path.resolve(filePath)
|
|
@@ -486,14 +605,17 @@ export const getContext = (
|
|
|
486
605
|
|
|
487
606
|
if (!docIndex || !sectionIndex) {
|
|
488
607
|
return yield* Effect.fail(
|
|
489
|
-
new
|
|
608
|
+
new IndexNotFoundError({ path: storage.rootPath }),
|
|
490
609
|
)
|
|
491
610
|
}
|
|
492
611
|
|
|
493
612
|
const document = docIndex.documents[relativePath]
|
|
494
613
|
if (!document) {
|
|
495
614
|
return yield* Effect.fail(
|
|
496
|
-
new
|
|
615
|
+
new DocumentNotFoundError({
|
|
616
|
+
path: relativePath,
|
|
617
|
+
indexPath: storage.rootPath,
|
|
618
|
+
}),
|
|
497
619
|
)
|
|
498
620
|
}
|
|
499
621
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type declarations for wink-bm25-text-search
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
declare module 'wink-bm25-text-search' {
|
|
6
|
+
interface BM25Config {
|
|
7
|
+
fldWeights?: Record<string, number>
|
|
8
|
+
bm25Params?: {
|
|
9
|
+
k1?: number
|
|
10
|
+
b?: number
|
|
11
|
+
}
|
|
12
|
+
ovFldNames?: string[]
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
type PrepTask = (text: string) => string[]
|
|
16
|
+
|
|
17
|
+
interface BM25Engine {
|
|
18
|
+
defineConfig(config: BM25Config): void
|
|
19
|
+
definePrepTasks(tasks: PrepTask[]): void
|
|
20
|
+
addDoc(doc: Record<string, string>, id: number): void
|
|
21
|
+
consolidate(): void
|
|
22
|
+
search(query: string, limit?: number): [number, number][]
|
|
23
|
+
exportJSON(): string
|
|
24
|
+
importJSON(json: string): void
|
|
25
|
+
reset(): void
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function bm25(): BM25Engine
|
|
29
|
+
export default bm25
|
|
30
|
+
}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claude CLI Summarizer
|
|
3
|
+
*
|
|
4
|
+
* Uses Claude Code CLI for AI summarization.
|
|
5
|
+
* FREE for users with Claude Code subscriptions.
|
|
6
|
+
*
|
|
7
|
+
* SECURITY: Uses spawn() with argument arrays - NEVER exec() with string interpolation.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { spawn } from 'node:child_process'
|
|
11
|
+
import type {
|
|
12
|
+
StreamingSummarizer,
|
|
13
|
+
StreamOptions,
|
|
14
|
+
SummaryResult,
|
|
15
|
+
} from '../types.js'
|
|
16
|
+
import { SummarizationError as SummarizationErrorClass } from '../types.js'
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Claude CLI provider for summarization.
|
|
20
|
+
*
|
|
21
|
+
* Uses the `claude` CLI tool in non-interactive mode with text output.
|
|
22
|
+
* Requires Claude Code installation and authentication.
|
|
23
|
+
*
|
|
24
|
+
* @security Uses spawn() with argument arrays to prevent shell injection.
|
|
25
|
+
* User input is passed as array elements, never interpolated.
|
|
26
|
+
*
|
|
27
|
+
* @cost Free (uses existing Claude subscription)
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* const summarizer = new ClaudeCLISummarizer()
|
|
32
|
+
*
|
|
33
|
+
* // Check availability
|
|
34
|
+
* if (await summarizer.isAvailable()) {
|
|
35
|
+
* const result = await summarizer.summarize(searchResults, prompt)
|
|
36
|
+
* console.log(result.summary)
|
|
37
|
+
* // result.estimatedCost is always 0 (free)
|
|
38
|
+
* }
|
|
39
|
+
*
|
|
40
|
+
* // Streaming output
|
|
41
|
+
* await summarizer.summarizeStream(searchResults, prompt, {
|
|
42
|
+
* onChunk: (chunk) => process.stdout.write(chunk),
|
|
43
|
+
* onComplete: (result) => console.log(`Done in ${result.durationMs}ms`),
|
|
44
|
+
* })
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
47
|
+
export class ClaudeCLISummarizer implements StreamingSummarizer {
|
|
48
|
+
private readonly command = 'claude'
|
|
49
|
+
|
|
50
|
+
async summarize(input: string, prompt: string): Promise<SummaryResult> {
|
|
51
|
+
const startTime = Date.now()
|
|
52
|
+
const fullPrompt = `${prompt}\n\n${input}`
|
|
53
|
+
|
|
54
|
+
return new Promise((resolve, reject) => {
|
|
55
|
+
// SECURITY: spawn() with argument array - safe from shell injection
|
|
56
|
+
const proc = spawn(
|
|
57
|
+
this.command,
|
|
58
|
+
['-p', fullPrompt, '--output-format', 'text'],
|
|
59
|
+
{
|
|
60
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
61
|
+
},
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
let stdout = ''
|
|
65
|
+
let stderr = ''
|
|
66
|
+
|
|
67
|
+
proc.stdout.on('data', (data: Buffer) => {
|
|
68
|
+
stdout += data.toString()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
proc.stderr.on('data', (data: Buffer) => {
|
|
72
|
+
stderr += data.toString()
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
proc.on('close', (code: number | null) => {
|
|
76
|
+
const durationMs = Date.now() - startTime
|
|
77
|
+
|
|
78
|
+
if (code !== 0) {
|
|
79
|
+
reject(
|
|
80
|
+
new SummarizationErrorClass(
|
|
81
|
+
`Claude CLI exited with code ${code}: ${stderr}`,
|
|
82
|
+
'CLI_EXECUTION_FAILED',
|
|
83
|
+
'claude',
|
|
84
|
+
),
|
|
85
|
+
)
|
|
86
|
+
return
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
resolve({
|
|
90
|
+
summary: stdout.trim(),
|
|
91
|
+
provider: 'claude',
|
|
92
|
+
mode: 'cli',
|
|
93
|
+
estimatedCost: 0,
|
|
94
|
+
durationMs,
|
|
95
|
+
})
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
proc.on('error', (error: Error) => {
|
|
99
|
+
reject(
|
|
100
|
+
new SummarizationErrorClass(
|
|
101
|
+
`Failed to spawn Claude CLI: ${error.message}`,
|
|
102
|
+
'CLI_EXECUTION_FAILED',
|
|
103
|
+
'claude',
|
|
104
|
+
error,
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
})
|
|
108
|
+
})
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async summarizeStream(
|
|
112
|
+
input: string,
|
|
113
|
+
prompt: string,
|
|
114
|
+
options: StreamOptions,
|
|
115
|
+
): Promise<void> {
|
|
116
|
+
const startTime = Date.now()
|
|
117
|
+
const fullPrompt = `${prompt}\n\n${input}`
|
|
118
|
+
|
|
119
|
+
return new Promise((resolve, reject) => {
|
|
120
|
+
// SECURITY: spawn() with argument array - safe from shell injection
|
|
121
|
+
const proc = spawn(
|
|
122
|
+
this.command,
|
|
123
|
+
['-p', fullPrompt, '--output-format', 'text'],
|
|
124
|
+
{
|
|
125
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
let fullOutput = ''
|
|
130
|
+
let stderr = ''
|
|
131
|
+
|
|
132
|
+
proc.stdout.on('data', (data: Buffer) => {
|
|
133
|
+
const chunk = data.toString()
|
|
134
|
+
fullOutput += chunk
|
|
135
|
+
options.onChunk(chunk)
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
proc.stderr.on('data', (data: Buffer) => {
|
|
139
|
+
stderr += data.toString()
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
proc.on('close', (code: number | null) => {
|
|
143
|
+
const durationMs = Date.now() - startTime
|
|
144
|
+
|
|
145
|
+
if (code !== 0) {
|
|
146
|
+
const error = new SummarizationErrorClass(
|
|
147
|
+
`Claude CLI exited with code ${code}: ${stderr}`,
|
|
148
|
+
'CLI_EXECUTION_FAILED',
|
|
149
|
+
'claude',
|
|
150
|
+
)
|
|
151
|
+
options.onError?.(error)
|
|
152
|
+
reject(error)
|
|
153
|
+
return
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const result: SummaryResult = {
|
|
157
|
+
summary: fullOutput.trim(),
|
|
158
|
+
provider: 'claude',
|
|
159
|
+
mode: 'cli',
|
|
160
|
+
estimatedCost: 0,
|
|
161
|
+
durationMs,
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
options.onComplete?.(result)
|
|
165
|
+
resolve()
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
proc.on('error', (error: Error) => {
|
|
169
|
+
const sumError = new SummarizationErrorClass(
|
|
170
|
+
`Failed to spawn Claude CLI: ${error.message}`,
|
|
171
|
+
'CLI_EXECUTION_FAILED',
|
|
172
|
+
'claude',
|
|
173
|
+
error,
|
|
174
|
+
)
|
|
175
|
+
options.onError?.(sumError)
|
|
176
|
+
reject(sumError)
|
|
177
|
+
})
|
|
178
|
+
})
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
estimateCost(_inputTokens: number): number {
|
|
182
|
+
// CLI providers are free with subscription
|
|
183
|
+
return 0
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
async isAvailable(): Promise<boolean> {
|
|
187
|
+
return new Promise((resolve) => {
|
|
188
|
+
const checkCommand = process.platform === 'win32' ? 'where' : 'which'
|
|
189
|
+
const proc = spawn(checkCommand, [this.command], {
|
|
190
|
+
stdio: ['ignore', 'ignore', 'ignore'],
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
proc.on('close', (code) => {
|
|
194
|
+
resolve(code === 0)
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
proc.on('error', () => {
|
|
198
|
+
resolve(false)
|
|
199
|
+
})
|
|
200
|
+
})
|
|
201
|
+
}
|
|
202
|
+
}
|