mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
package/src/index/indexer.ts
CHANGED
|
@@ -5,8 +5,18 @@
|
|
|
5
5
|
import * as fs from 'node:fs/promises'
|
|
6
6
|
import * as path from 'node:path'
|
|
7
7
|
import { Effect } from 'effect'
|
|
8
|
+
import type { Ignore } from 'ignore'
|
|
8
9
|
import type { MdSection } from '../core/types.js'
|
|
10
|
+
import {
|
|
11
|
+
type DirectoryCreateError,
|
|
12
|
+
DirectoryWalkError,
|
|
13
|
+
type FileReadError,
|
|
14
|
+
type FileWriteError,
|
|
15
|
+
type IndexCorruptedError,
|
|
16
|
+
ParseError,
|
|
17
|
+
} from '../errors/index.js'
|
|
9
18
|
import { parse } from '../parser/parser.js'
|
|
19
|
+
import { createIgnoreFilter, shouldIgnore } from './ignore-patterns.js'
|
|
10
20
|
import {
|
|
11
21
|
computeHash,
|
|
12
22
|
createEmptyDocumentIndex,
|
|
@@ -24,9 +34,10 @@ import {
|
|
|
24
34
|
import type {
|
|
25
35
|
DocumentEntry,
|
|
26
36
|
DocumentIndex,
|
|
27
|
-
|
|
37
|
+
FileProcessingError,
|
|
28
38
|
IndexResult,
|
|
29
39
|
SectionEntry,
|
|
40
|
+
SkipSummary,
|
|
30
41
|
} from './types.js'
|
|
31
42
|
|
|
32
43
|
// ============================================================================
|
|
@@ -36,52 +47,68 @@ import type {
|
|
|
36
47
|
const isMarkdownFile = (filename: string): boolean =>
|
|
37
48
|
filename.endsWith('.md') || filename.endsWith('.mdx')
|
|
38
49
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
normalized.includes('node_modules')
|
|
48
|
-
) {
|
|
49
|
-
return true
|
|
50
|
-
}
|
|
51
|
-
if (pattern.startsWith('**/.*') && normalized.includes('/.')) {
|
|
52
|
-
return true
|
|
53
|
-
}
|
|
50
|
+
/**
|
|
51
|
+
* Result of directory walk including tracked skip counts
|
|
52
|
+
*/
|
|
53
|
+
interface WalkResult {
|
|
54
|
+
readonly files: string[]
|
|
55
|
+
readonly skipped: {
|
|
56
|
+
hidden: number
|
|
57
|
+
excluded: number
|
|
54
58
|
}
|
|
55
|
-
return false
|
|
56
59
|
}
|
|
57
60
|
|
|
61
|
+
/**
|
|
62
|
+
* Walk directory using ignore filter for pattern matching.
|
|
63
|
+
*
|
|
64
|
+
* @param dir - Directory to walk
|
|
65
|
+
* @param rootPath - Root path for computing relative paths
|
|
66
|
+
* @param filter - Ignore filter instance
|
|
67
|
+
* @returns Walk result with files and skip counts
|
|
68
|
+
*/
|
|
58
69
|
const walkDirectory = async (
|
|
59
70
|
dir: string,
|
|
60
|
-
|
|
61
|
-
|
|
71
|
+
rootPath: string,
|
|
72
|
+
filter: Ignore,
|
|
73
|
+
): Promise<WalkResult> => {
|
|
62
74
|
const files: string[] = []
|
|
75
|
+
let hiddenCount = 0
|
|
76
|
+
let excludedCount = 0
|
|
63
77
|
const entries = await fs.readdir(dir, { withFileTypes: true })
|
|
64
78
|
|
|
65
79
|
for (const entry of entries) {
|
|
66
80
|
const fullPath = path.join(dir, entry.name)
|
|
81
|
+
const relativePath = path.relative(rootPath, fullPath)
|
|
67
82
|
|
|
68
|
-
|
|
83
|
+
// Skip hidden files/directories (starting with .)
|
|
84
|
+
if (entry.name.startsWith('.')) {
|
|
85
|
+
if (entry.isDirectory()) {
|
|
86
|
+
hiddenCount++
|
|
87
|
+
}
|
|
69
88
|
continue
|
|
70
89
|
}
|
|
71
90
|
|
|
72
|
-
|
|
91
|
+
// Check ignore filter for both files and directories
|
|
92
|
+
if (shouldIgnore(relativePath, filter)) {
|
|
93
|
+
if (entry.isDirectory()) {
|
|
94
|
+
excludedCount++
|
|
95
|
+
} else {
|
|
96
|
+
excludedCount++
|
|
97
|
+
}
|
|
73
98
|
continue
|
|
74
99
|
}
|
|
75
100
|
|
|
76
101
|
if (entry.isDirectory()) {
|
|
77
|
-
const
|
|
78
|
-
files.push(...
|
|
102
|
+
const subResult = await walkDirectory(fullPath, rootPath, filter)
|
|
103
|
+
files.push(...subResult.files)
|
|
104
|
+
hiddenCount += subResult.skipped.hidden
|
|
105
|
+
excludedCount += subResult.skipped.excluded
|
|
79
106
|
} else if (entry.isFile() && isMarkdownFile(entry.name)) {
|
|
80
107
|
files.push(fullPath)
|
|
81
108
|
}
|
|
82
109
|
}
|
|
83
110
|
|
|
84
|
-
return files
|
|
111
|
+
return { files, skipped: { hidden: hiddenCount, excluded: excludedCount } }
|
|
85
112
|
}
|
|
86
113
|
|
|
87
114
|
// ============================================================================
|
|
@@ -156,19 +183,39 @@ const resolveInternalLink = (
|
|
|
156
183
|
// Index Building
|
|
157
184
|
// ============================================================================
|
|
158
185
|
|
|
186
|
+
export interface IndexProgress {
|
|
187
|
+
readonly current: number
|
|
188
|
+
readonly total: number
|
|
189
|
+
readonly filePath: string
|
|
190
|
+
}
|
|
191
|
+
|
|
159
192
|
export interface IndexOptions {
|
|
160
|
-
readonly force?: boolean
|
|
161
|
-
|
|
193
|
+
readonly force?: boolean | undefined
|
|
194
|
+
/** CLI/config exclude patterns (overrides ignore files) */
|
|
195
|
+
readonly exclude?: readonly string[] | undefined
|
|
196
|
+
/** Whether to honor .gitignore (default: true) */
|
|
197
|
+
readonly honorGitignore?: boolean | undefined
|
|
198
|
+
/** Whether to honor .mdcontextignore (default: true) */
|
|
199
|
+
readonly honorMdcontextignore?: boolean | undefined
|
|
200
|
+
/** Callback for progress updates during file indexing */
|
|
201
|
+
readonly onProgress?: ((progress: IndexProgress) => void) | undefined
|
|
162
202
|
}
|
|
163
203
|
|
|
164
204
|
export const buildIndex = (
|
|
165
205
|
rootPath: string,
|
|
166
206
|
options: IndexOptions = {},
|
|
167
|
-
): Effect.Effect<
|
|
207
|
+
): Effect.Effect<
|
|
208
|
+
IndexResult,
|
|
209
|
+
| DirectoryWalkError
|
|
210
|
+
| DirectoryCreateError
|
|
211
|
+
| FileReadError
|
|
212
|
+
| FileWriteError
|
|
213
|
+
| IndexCorruptedError
|
|
214
|
+
> =>
|
|
168
215
|
Effect.gen(function* () {
|
|
169
216
|
const startTime = Date.now()
|
|
170
217
|
const storage = createStorage(rootPath)
|
|
171
|
-
const errors:
|
|
218
|
+
const errors: FileProcessingError[] = []
|
|
172
219
|
|
|
173
220
|
// Initialize storage
|
|
174
221
|
yield* initializeIndex(storage)
|
|
@@ -186,42 +233,82 @@ export const buildIndex = (
|
|
|
186
233
|
const sectionIndex = existingSectionIndex ?? createEmptySectionIndex()
|
|
187
234
|
const linkIndex = existingLinkIndex ?? createEmptyLinkIndex()
|
|
188
235
|
|
|
189
|
-
//
|
|
190
|
-
|
|
191
|
-
const
|
|
192
|
-
|
|
193
|
-
|
|
236
|
+
// Build ignore filter with proper precedence:
|
|
237
|
+
// CLI/config patterns > .mdcontextignore > .gitignore > defaults
|
|
238
|
+
const ignoreResult = yield* createIgnoreFilter({
|
|
239
|
+
rootPath: storage.rootPath,
|
|
240
|
+
cliPatterns: options.exclude,
|
|
241
|
+
honorGitignore: options.honorGitignore ?? true,
|
|
242
|
+
honorMdcontextignore: options.honorMdcontextignore ?? true,
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
// Discover files using the ignore filter
|
|
246
|
+
const walkResult = yield* Effect.tryPromise({
|
|
247
|
+
try: () =>
|
|
248
|
+
walkDirectory(storage.rootPath, storage.rootPath, ignoreResult.filter),
|
|
249
|
+
catch: (e) =>
|
|
250
|
+
new DirectoryWalkError({
|
|
251
|
+
path: storage.rootPath,
|
|
252
|
+
message: `Failed to traverse directory: ${e instanceof Error ? e.message : String(e)}`,
|
|
253
|
+
cause: e,
|
|
254
|
+
}),
|
|
194
255
|
})
|
|
195
256
|
|
|
257
|
+
const { files, skipped: walkSkipped } = walkResult
|
|
258
|
+
|
|
196
259
|
// Process each file
|
|
197
260
|
let documentsIndexed = 0
|
|
198
261
|
let sectionsIndexed = 0
|
|
199
262
|
let linksIndexed = 0
|
|
263
|
+
let unchangedCount = 0
|
|
200
264
|
|
|
201
265
|
const mutableDocuments: Record<string, DocumentEntry> = {
|
|
202
266
|
...docIndex.documents,
|
|
203
267
|
}
|
|
204
|
-
// Initialize with existing data to preserve sections/links for unchanged files
|
|
205
268
|
const mutableSections: Record<string, SectionEntry> = {
|
|
206
269
|
...sectionIndex.sections,
|
|
207
270
|
}
|
|
208
|
-
|
|
209
|
-
|
|
271
|
+
|
|
272
|
+
const mutableByHeading: Record<string, string[]> = Object.assign(
|
|
273
|
+
Object.create(null),
|
|
274
|
+
Object.fromEntries(
|
|
275
|
+
Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]]),
|
|
276
|
+
),
|
|
210
277
|
)
|
|
211
|
-
const mutableByDocument: Record<string, string[]> = Object.
|
|
212
|
-
Object.
|
|
278
|
+
const mutableByDocument: Record<string, string[]> = Object.assign(
|
|
279
|
+
Object.create(null),
|
|
280
|
+
Object.fromEntries(
|
|
281
|
+
Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]]),
|
|
282
|
+
),
|
|
213
283
|
)
|
|
214
|
-
const mutableForward: Record<string, string[]> = Object.
|
|
215
|
-
Object.
|
|
284
|
+
const mutableForward: Record<string, string[]> = Object.assign(
|
|
285
|
+
Object.create(null),
|
|
286
|
+
Object.fromEntries(
|
|
287
|
+
Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]]),
|
|
288
|
+
),
|
|
216
289
|
)
|
|
217
|
-
const mutableBackward: Record<string, string[]> = Object.
|
|
218
|
-
Object.
|
|
290
|
+
const mutableBackward: Record<string, string[]> = Object.assign(
|
|
291
|
+
Object.create(null),
|
|
292
|
+
Object.fromEntries(
|
|
293
|
+
Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]]),
|
|
294
|
+
),
|
|
219
295
|
)
|
|
220
296
|
const brokenLinks: string[] = [...linkIndex.broken]
|
|
297
|
+
const totalFiles = files.length
|
|
221
298
|
|
|
222
|
-
for (
|
|
299
|
+
for (let fileIndex = 0; fileIndex < files.length; fileIndex++) {
|
|
300
|
+
const filePath = files[fileIndex]!
|
|
223
301
|
const relativePath = path.relative(storage.rootPath, filePath)
|
|
224
302
|
|
|
303
|
+
// Report progress
|
|
304
|
+
if (options.onProgress) {
|
|
305
|
+
options.onProgress({
|
|
306
|
+
current: fileIndex + 1,
|
|
307
|
+
total: totalFiles,
|
|
308
|
+
filePath: relativePath,
|
|
309
|
+
})
|
|
310
|
+
}
|
|
311
|
+
|
|
225
312
|
// Process each file, collecting errors instead of failing
|
|
226
313
|
const processFile = Effect.gen(function* () {
|
|
227
314
|
// Read file content and stats
|
|
@@ -239,6 +326,7 @@ export const buildIndex = (
|
|
|
239
326
|
existingEntry.hash === hash &&
|
|
240
327
|
existingEntry.mtime === stats.mtime.getTime()
|
|
241
328
|
) {
|
|
329
|
+
unchangedCount++
|
|
242
330
|
return // File unchanged, skip processing
|
|
243
331
|
}
|
|
244
332
|
|
|
@@ -248,7 +336,13 @@ export const buildIndex = (
|
|
|
248
336
|
lastModified: stats.mtime,
|
|
249
337
|
}).pipe(
|
|
250
338
|
Effect.mapError(
|
|
251
|
-
(e) =>
|
|
339
|
+
(e) =>
|
|
340
|
+
new ParseError({
|
|
341
|
+
message: e.message,
|
|
342
|
+
path: relativePath,
|
|
343
|
+
...(e.line !== undefined && { line: e.line }),
|
|
344
|
+
...(e.column !== undefined && { column: e.column }),
|
|
345
|
+
}),
|
|
252
346
|
),
|
|
253
347
|
)
|
|
254
348
|
|
|
@@ -333,10 +427,18 @@ export const buildIndex = (
|
|
|
333
427
|
|
|
334
428
|
mutableForward[relativePath] = outgoingLinks
|
|
335
429
|
}).pipe(
|
|
430
|
+
// Note: catchAll is intentional for batch file processing.
|
|
431
|
+
// Individual file failures should be collected in errors array
|
|
432
|
+
// rather than stopping the entire index build operation.
|
|
336
433
|
Effect.catchAll((error) => {
|
|
434
|
+
// Extract message from typed errors or generic errors
|
|
435
|
+
const message =
|
|
436
|
+
'message' in error && typeof error.message === 'string'
|
|
437
|
+
? error.message
|
|
438
|
+
: String(error)
|
|
337
439
|
errors.push({
|
|
338
440
|
path: relativePath,
|
|
339
|
-
message
|
|
441
|
+
message,
|
|
340
442
|
})
|
|
341
443
|
return Effect.void
|
|
342
444
|
}),
|
|
@@ -383,6 +485,14 @@ export const buildIndex = (
|
|
|
383
485
|
0,
|
|
384
486
|
)
|
|
385
487
|
|
|
488
|
+
// Build skip summary
|
|
489
|
+
const skipped: SkipSummary = {
|
|
490
|
+
unchanged: unchangedCount,
|
|
491
|
+
excluded: walkSkipped.excluded,
|
|
492
|
+
hidden: walkSkipped.hidden,
|
|
493
|
+
total: unchangedCount + walkSkipped.excluded + walkSkipped.hidden,
|
|
494
|
+
}
|
|
495
|
+
|
|
386
496
|
return {
|
|
387
497
|
documentsIndexed,
|
|
388
498
|
sectionsIndexed,
|
|
@@ -392,6 +502,7 @@ export const buildIndex = (
|
|
|
392
502
|
totalLinks,
|
|
393
503
|
duration,
|
|
394
504
|
errors,
|
|
505
|
+
skipped,
|
|
395
506
|
}
|
|
396
507
|
})
|
|
397
508
|
|
|
@@ -402,7 +513,7 @@ export const buildIndex = (
|
|
|
402
513
|
export const getOutgoingLinks = (
|
|
403
514
|
rootPath: string,
|
|
404
515
|
filePath: string,
|
|
405
|
-
): Effect.Effect<readonly string[],
|
|
516
|
+
): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
|
|
406
517
|
Effect.gen(function* () {
|
|
407
518
|
const storage = createStorage(rootPath)
|
|
408
519
|
const linkIndex = yield* loadLinkIndex(storage)
|
|
@@ -418,7 +529,7 @@ export const getOutgoingLinks = (
|
|
|
418
529
|
export const getIncomingLinks = (
|
|
419
530
|
rootPath: string,
|
|
420
531
|
filePath: string,
|
|
421
|
-
): Effect.Effect<readonly string[],
|
|
532
|
+
): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
|
|
422
533
|
Effect.gen(function* () {
|
|
423
534
|
const storage = createStorage(rootPath)
|
|
424
535
|
const linkIndex = yield* loadLinkIndex(storage)
|
|
@@ -433,7 +544,7 @@ export const getIncomingLinks = (
|
|
|
433
544
|
|
|
434
545
|
export const getBrokenLinks = (
|
|
435
546
|
rootPath: string,
|
|
436
|
-
): Effect.Effect<readonly string[],
|
|
547
|
+
): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
|
|
437
548
|
Effect.gen(function* () {
|
|
438
549
|
const storage = createStorage(rootPath)
|
|
439
550
|
const linkIndex = yield* loadLinkIndex(storage)
|
|
@@ -444,3 +555,130 @@ export const getBrokenLinks = (
|
|
|
444
555
|
|
|
445
556
|
return linkIndex.broken
|
|
446
557
|
})
|
|
558
|
+
|
|
559
|
+
// ============================================================================
|
|
560
|
+
// BM25 Index Building
|
|
561
|
+
// ============================================================================
|
|
562
|
+
|
|
563
|
+
import { type BM25Document, createBM25Store } from '../search/bm25-store.js'
|
|
564
|
+
|
|
565
|
+
export interface BuildBM25Options {
|
|
566
|
+
readonly force?: boolean
|
|
567
|
+
readonly onProgress?: (progress: { current: number; total: number }) => void
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
export interface BuildBM25Result {
|
|
571
|
+
readonly sectionsIndexed: number
|
|
572
|
+
readonly duration: number
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
/**
|
|
576
|
+
* Build BM25 keyword index for all sections.
|
|
577
|
+
*
|
|
578
|
+
* @param rootPath - Root directory containing indexed markdown files
|
|
579
|
+
* @param options - Build options (force rebuild, progress callback)
|
|
580
|
+
* @returns Result with section count and timing
|
|
581
|
+
*/
|
|
582
|
+
export const buildBM25Index = (
|
|
583
|
+
rootPath: string,
|
|
584
|
+
options: BuildBM25Options = {},
|
|
585
|
+
): Effect.Effect<
|
|
586
|
+
BuildBM25Result,
|
|
587
|
+
FileReadError | IndexCorruptedError | FileWriteError
|
|
588
|
+
> =>
|
|
589
|
+
Effect.gen(function* () {
|
|
590
|
+
const startTime = Date.now()
|
|
591
|
+
const storage = createStorage(rootPath)
|
|
592
|
+
|
|
593
|
+
// Load section index
|
|
594
|
+
const docIndex = yield* loadDocumentIndex(storage)
|
|
595
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
596
|
+
|
|
597
|
+
if (!docIndex || !sectionIndex) {
|
|
598
|
+
return { sectionsIndexed: 0, duration: 0 }
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// Create BM25 store
|
|
602
|
+
const bm25Store = createBM25Store(storage.rootPath)
|
|
603
|
+
|
|
604
|
+
// Check if we can skip
|
|
605
|
+
if (!options.force) {
|
|
606
|
+
const loaded = yield* bm25Store.load()
|
|
607
|
+
if (loaded) {
|
|
608
|
+
const stats = bm25Store.getStats()
|
|
609
|
+
if (stats.count > 0) {
|
|
610
|
+
return { sectionsIndexed: 0, duration: Date.now() - startTime }
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Clear and rebuild
|
|
616
|
+
bm25Store.clear()
|
|
617
|
+
|
|
618
|
+
// Group sections by document for efficient file reading
|
|
619
|
+
const sectionsByDoc: Map<string, SectionEntry[]> = new Map()
|
|
620
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
621
|
+
if (section.tokenCount < 10) continue
|
|
622
|
+
const existing = sectionsByDoc.get(section.documentPath)
|
|
623
|
+
if (existing) {
|
|
624
|
+
existing.push(section)
|
|
625
|
+
} else {
|
|
626
|
+
sectionsByDoc.set(section.documentPath, [section])
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
const totalDocs = sectionsByDoc.size
|
|
631
|
+
let processedDocs = 0
|
|
632
|
+
let sectionsIndexed = 0
|
|
633
|
+
|
|
634
|
+
// Process each document
|
|
635
|
+
for (const [docPath, sections] of sectionsByDoc) {
|
|
636
|
+
const filePath = path.join(storage.rootPath, docPath)
|
|
637
|
+
|
|
638
|
+
// Read file content
|
|
639
|
+
const fileContentResult = yield* Effect.promise(() =>
|
|
640
|
+
fs.readFile(filePath, 'utf-8'),
|
|
641
|
+
).pipe(
|
|
642
|
+
Effect.map((content) => ({ ok: true as const, content })),
|
|
643
|
+
Effect.catchAll(() =>
|
|
644
|
+
Effect.succeed({ ok: false as const, content: '' }),
|
|
645
|
+
),
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if (!fileContentResult.ok) continue
|
|
649
|
+
|
|
650
|
+
const lines = fileContentResult.content.split('\n')
|
|
651
|
+
const docs: BM25Document[] = []
|
|
652
|
+
|
|
653
|
+
for (const section of sections) {
|
|
654
|
+
const content = lines
|
|
655
|
+
.slice(section.startLine - 1, section.endLine)
|
|
656
|
+
.join('\n')
|
|
657
|
+
|
|
658
|
+
docs.push({
|
|
659
|
+
id: section.id,
|
|
660
|
+
sectionId: section.id,
|
|
661
|
+
documentPath: section.documentPath,
|
|
662
|
+
heading: section.heading,
|
|
663
|
+
content,
|
|
664
|
+
})
|
|
665
|
+
sectionsIndexed++
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
yield* bm25Store.add(docs)
|
|
669
|
+
|
|
670
|
+
processedDocs++
|
|
671
|
+
if (options.onProgress) {
|
|
672
|
+
options.onProgress({ current: processedDocs, total: totalDocs })
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// Consolidate and save
|
|
677
|
+
yield* bm25Store.consolidate()
|
|
678
|
+
yield* bm25Store.save()
|
|
679
|
+
|
|
680
|
+
return {
|
|
681
|
+
sectionsIndexed,
|
|
682
|
+
duration: Date.now() - startTime,
|
|
683
|
+
}
|
|
684
|
+
})
|