mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
package/dist/chunk-VVTGZNBT.js
CHANGED
|
@@ -1,271 +1,305 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
2
|
+
countTokensApprox,
|
|
3
|
+
createStorage,
|
|
4
|
+
INDEX_DIR,
|
|
5
|
+
loadDocumentIndex,
|
|
6
|
+
loadSectionIndex,
|
|
7
|
+
parseFile,
|
|
8
8
|
} from "./chunk-S7E6TFX6.js";
|
|
9
9
|
|
|
10
10
|
// src/summarize/formatters.ts
|
|
11
11
|
var formatSummary = (summary, options = {}) => {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
12
|
+
const maxTokens = options.maxTokens;
|
|
13
|
+
const flatSections = [];
|
|
14
|
+
const collectSections = (
|
|
15
|
+
section,
|
|
16
|
+
depth = 0,
|
|
17
|
+
parentNumber = "",
|
|
18
|
+
index = 0,
|
|
19
|
+
) => {
|
|
20
|
+
const number = parentNumber
|
|
21
|
+
? `${parentNumber}.${index + 1}`
|
|
22
|
+
: `${index + 1}`;
|
|
23
|
+
flatSections.push({ section, depth, number });
|
|
24
|
+
section.children.forEach((child, i) => {
|
|
25
|
+
collectSections(child, depth + 1, number, i);
|
|
26
|
+
});
|
|
27
|
+
};
|
|
28
|
+
summary.sections.forEach((section, i) => {
|
|
29
|
+
collectSections(section, 0, "", i);
|
|
30
|
+
});
|
|
31
|
+
const buildOutput = (
|
|
32
|
+
includedSectionIndices,
|
|
33
|
+
truncationInfo,
|
|
34
|
+
includeTopics2,
|
|
35
|
+
) => {
|
|
36
|
+
const lines = [];
|
|
37
|
+
if (
|
|
38
|
+
truncationInfo.showWarning &&
|
|
39
|
+
truncationInfo.truncatedCount > 0 &&
|
|
40
|
+
truncationInfo.tokensTotal > 0
|
|
41
|
+
) {
|
|
42
|
+
const pct = Math.round(
|
|
43
|
+
(truncationInfo.tokensShown / truncationInfo.tokensTotal) * 100,
|
|
44
|
+
);
|
|
45
|
+
lines.push(
|
|
46
|
+
`\u26A0\uFE0F Truncated: Showing ~${truncationInfo.tokensShown}/${truncationInfo.tokensTotal} tokens (${pct}%)`,
|
|
47
|
+
);
|
|
48
|
+
if (truncationInfo.includedNumbers.length > 0) {
|
|
49
|
+
const includedDisplay =
|
|
50
|
+
truncationInfo.includedNumbers.length <= 6
|
|
51
|
+
? truncationInfo.includedNumbers.join(", ")
|
|
52
|
+
: truncationInfo.includedNumbers.slice(0, 5).join(", ") +
|
|
53
|
+
`, ... (+${truncationInfo.includedNumbers.length - 5} more)`;
|
|
54
|
+
lines.push(`Sections included: ${includedDisplay}`);
|
|
55
|
+
}
|
|
56
|
+
if (truncationInfo.excludedNumbers.length > 0) {
|
|
57
|
+
const excludedDisplay =
|
|
58
|
+
truncationInfo.excludedNumbers.length <= 6
|
|
59
|
+
? truncationInfo.excludedNumbers.join(", ")
|
|
60
|
+
: truncationInfo.excludedNumbers.slice(0, 5).join(", ") +
|
|
61
|
+
`, ... (+${truncationInfo.excludedNumbers.length - 5} more)`;
|
|
62
|
+
lines.push(`Sections excluded: ${excludedDisplay}`);
|
|
63
|
+
}
|
|
64
|
+
lines.push(
|
|
65
|
+
"Use --full for complete content or --section to target specific sections.",
|
|
66
|
+
);
|
|
67
|
+
lines.push("");
|
|
68
|
+
}
|
|
69
|
+
lines.push(`# ${summary.title}`);
|
|
70
|
+
lines.push(`Path: ${summary.path}`);
|
|
71
|
+
const tokenLineIndex = lines.length;
|
|
72
|
+
lines.push("PLACEHOLDER");
|
|
73
|
+
lines.push("");
|
|
74
|
+
const fullTopicsLine2 =
|
|
75
|
+
summary.keyTopics.length > 0
|
|
76
|
+
? `**Topics:** ${summary.keyTopics.join(", ")}`
|
|
77
|
+
: "";
|
|
78
|
+
if (includeTopics2 && fullTopicsLine2) {
|
|
79
|
+
lines.push(fullTopicsLine2);
|
|
80
|
+
lines.push("");
|
|
81
|
+
}
|
|
82
|
+
const sectionLines = [];
|
|
83
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
84
|
+
if (!includedSectionIndices.has(i)) continue;
|
|
85
|
+
const { section, depth } = flatSections[i];
|
|
86
|
+
const indent = " ".repeat(depth);
|
|
87
|
+
const prefix = "#".repeat(section.level);
|
|
88
|
+
sectionLines.push(`${indent}${prefix} ${section.heading}`);
|
|
89
|
+
if (section.summary) {
|
|
90
|
+
sectionLines.push(`${indent}${section.summary}`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
lines.push(sectionLines.join("\n"));
|
|
94
|
+
const tempOutput = lines.join("\n");
|
|
95
|
+
const tokensWithoutLine = countTokensApprox(
|
|
96
|
+
tempOutput.replace("PLACEHOLDER", ""),
|
|
97
|
+
);
|
|
98
|
+
let estimatedTotal = tokensWithoutLine + 8;
|
|
99
|
+
for (let iter = 0; iter < 3; iter++) {
|
|
100
|
+
const testTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`;
|
|
101
|
+
const testOutput = tempOutput.replace("PLACEHOLDER", testTokenLine);
|
|
102
|
+
const actualTotal = countTokensApprox(testOutput);
|
|
103
|
+
if (actualTotal === estimatedTotal) break;
|
|
104
|
+
estimatedTotal = actualTotal;
|
|
105
|
+
}
|
|
106
|
+
const finalTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`;
|
|
107
|
+
lines[tokenLineIndex] = finalTokenLine;
|
|
108
|
+
return lines.join("\n");
|
|
109
|
+
};
|
|
110
|
+
if (maxTokens === void 0) {
|
|
111
|
+
const allIndices = new Set(flatSections.map((_, i) => i));
|
|
112
|
+
const hasPriorTruncation = summary.truncated && summary.truncatedCount;
|
|
113
|
+
return buildOutput(
|
|
114
|
+
allIndices,
|
|
115
|
+
{
|
|
116
|
+
showWarning: !!hasPriorTruncation,
|
|
117
|
+
truncatedCount: summary.truncatedCount ?? 0,
|
|
118
|
+
includedNumbers: flatSections.map((s) => s.number),
|
|
119
|
+
excludedNumbers: [],
|
|
120
|
+
tokensShown: summary.summaryTokens,
|
|
121
|
+
tokensTotal: summary.originalTokens,
|
|
122
|
+
},
|
|
123
|
+
true,
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
const includedIndices = /* @__PURE__ */ new Set();
|
|
127
|
+
let truncatedCount = 0;
|
|
128
|
+
let includeTopics = true;
|
|
129
|
+
const SAFETY_MARGIN = 1.15;
|
|
130
|
+
const minHeaderTemplate = [
|
|
131
|
+
`# ${summary.title}`,
|
|
132
|
+
`Path: ${summary.path}`,
|
|
133
|
+
`Tokens: 9999 (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`,
|
|
134
|
+
"",
|
|
135
|
+
"",
|
|
136
|
+
].join("\n");
|
|
137
|
+
const minHeaderTokens = Math.ceil(
|
|
138
|
+
countTokensApprox(minHeaderTemplate) * SAFETY_MARGIN,
|
|
139
|
+
);
|
|
140
|
+
const fullTopicsLine =
|
|
141
|
+
summary.keyTopics.length > 0
|
|
142
|
+
? `**Topics:** ${summary.keyTopics.join(", ")}
|
|
143
|
+
`
|
|
144
|
+
: "";
|
|
145
|
+
const topicsTokens = fullTopicsLine
|
|
146
|
+
? Math.ceil(countTokensApprox(fullTopicsLine) * SAFETY_MARGIN)
|
|
147
|
+
: 0;
|
|
148
|
+
const truncationWarningTokens = Math.ceil(
|
|
149
|
+
countTokensApprox(
|
|
150
|
+
`\u26A0\uFE0F Truncated: Showing ~9999/9999 tokens (99%)
|
|
120
151
|
Sections included: 1, 2, 3, 4, 5, ... (+99 more)
|
|
121
152
|
Sections excluded: 6, 7, 8, 9, 10, ... (+99 more)
|
|
122
153
|
Use --full for complete content or --section to target specific sections.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
154
|
+
`,
|
|
155
|
+
) * SAFETY_MARGIN,
|
|
156
|
+
);
|
|
157
|
+
let headerTokens = minHeaderTokens + topicsTokens;
|
|
158
|
+
if (headerTokens >= maxTokens) {
|
|
159
|
+
includeTopics = false;
|
|
160
|
+
headerTokens = minHeaderTokens;
|
|
161
|
+
}
|
|
162
|
+
let contentBudget = maxTokens - headerTokens - truncationWarningTokens;
|
|
163
|
+
let tokensUsed = 0;
|
|
164
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
165
|
+
const { section, depth } = flatSections[i];
|
|
166
|
+
const indent = " ".repeat(depth);
|
|
167
|
+
const prefix = "#".repeat(section.level);
|
|
168
|
+
const sectionContent = section.summary
|
|
169
|
+
? `${indent}${prefix} ${section.heading}
|
|
170
|
+
${indent}${section.summary}`
|
|
171
|
+
: `${indent}${prefix} ${section.heading}`;
|
|
172
|
+
const sectionTokens = Math.ceil(
|
|
173
|
+
countTokensApprox(sectionContent) * SAFETY_MARGIN,
|
|
174
|
+
);
|
|
175
|
+
if (tokensUsed + sectionTokens <= contentBudget) {
|
|
176
|
+
includedIndices.add(i);
|
|
177
|
+
tokensUsed += sectionTokens;
|
|
178
|
+
} else {
|
|
179
|
+
truncatedCount++;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
if (truncatedCount === 0) {
|
|
183
|
+
contentBudget += truncationWarningTokens;
|
|
184
|
+
}
|
|
185
|
+
const includedNumbers = [];
|
|
186
|
+
const excludedNumbers = [];
|
|
187
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
188
|
+
if (includedIndices.has(i)) {
|
|
189
|
+
includedNumbers.push(flatSections[i].number);
|
|
190
|
+
} else {
|
|
191
|
+
excludedNumbers.push(flatSections[i].number);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
let tokensShown = 0;
|
|
195
|
+
for (const idx of includedIndices) {
|
|
196
|
+
tokensShown += flatSections[idx].section.summaryTokens;
|
|
197
|
+
}
|
|
198
|
+
let output = buildOutput(
|
|
199
|
+
includedIndices,
|
|
200
|
+
{
|
|
201
|
+
showWarning: truncatedCount > 0,
|
|
202
|
+
truncatedCount,
|
|
203
|
+
includedNumbers,
|
|
204
|
+
excludedNumbers,
|
|
205
|
+
tokensShown,
|
|
206
|
+
tokensTotal: summary.originalTokens,
|
|
207
|
+
},
|
|
208
|
+
includeTopics,
|
|
209
|
+
);
|
|
210
|
+
let actualTokens = countTokensApprox(output);
|
|
211
|
+
const sortedIndices = Array.from(includedIndices).sort((a, b) => b - a);
|
|
212
|
+
let removalIndex = 0;
|
|
213
|
+
while (actualTokens > maxTokens && removalIndex < sortedIndices.length) {
|
|
214
|
+
const indexToRemove = sortedIndices[removalIndex];
|
|
215
|
+
includedIndices.delete(indexToRemove);
|
|
216
|
+
truncatedCount++;
|
|
217
|
+
removalIndex++;
|
|
218
|
+
const removedNumber = flatSections[indexToRemove].number;
|
|
219
|
+
const includedIdx = includedNumbers.indexOf(removedNumber);
|
|
220
|
+
if (includedIdx !== -1) {
|
|
221
|
+
includedNumbers.splice(includedIdx, 1);
|
|
222
|
+
excludedNumbers.push(removedNumber);
|
|
223
|
+
}
|
|
224
|
+
tokensShown -= flatSections[indexToRemove].section.summaryTokens;
|
|
225
|
+
output = buildOutput(
|
|
226
|
+
includedIndices,
|
|
227
|
+
{
|
|
228
|
+
showWarning: true,
|
|
229
|
+
truncatedCount,
|
|
230
|
+
includedNumbers,
|
|
231
|
+
excludedNumbers,
|
|
232
|
+
tokensShown,
|
|
233
|
+
tokensTotal: summary.originalTokens,
|
|
234
|
+
},
|
|
235
|
+
includeTopics,
|
|
236
|
+
);
|
|
237
|
+
actualTokens = countTokensApprox(output);
|
|
238
|
+
}
|
|
239
|
+
if (actualTokens > maxTokens && includeTopics) {
|
|
240
|
+
includeTopics = false;
|
|
241
|
+
output = buildOutput(
|
|
242
|
+
includedIndices,
|
|
243
|
+
{
|
|
244
|
+
showWarning: truncatedCount > 0,
|
|
245
|
+
truncatedCount,
|
|
246
|
+
includedNumbers,
|
|
247
|
+
excludedNumbers,
|
|
248
|
+
tokensShown,
|
|
249
|
+
tokensTotal: summary.originalTokens,
|
|
250
|
+
},
|
|
251
|
+
includeTopics,
|
|
252
|
+
);
|
|
253
|
+
actualTokens = countTokensApprox(output);
|
|
254
|
+
}
|
|
255
|
+
if (actualTokens > maxTokens && truncatedCount > 0) {
|
|
256
|
+
output = buildOutput(
|
|
257
|
+
includedIndices,
|
|
258
|
+
{
|
|
259
|
+
showWarning: false,
|
|
260
|
+
truncatedCount,
|
|
261
|
+
includedNumbers,
|
|
262
|
+
excludedNumbers,
|
|
263
|
+
tokensShown,
|
|
264
|
+
tokensTotal: summary.originalTokens,
|
|
265
|
+
},
|
|
266
|
+
includeTopics,
|
|
267
|
+
);
|
|
268
|
+
actualTokens = countTokensApprox(output);
|
|
269
|
+
}
|
|
270
|
+
return output;
|
|
238
271
|
};
|
|
239
272
|
var formatAssembledContext = (context) => {
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
273
|
+
const lines = [];
|
|
274
|
+
lines.push("# Context Assembly");
|
|
275
|
+
lines.push(`Total tokens: ${context.totalTokens}/${context.budget}`);
|
|
276
|
+
lines.push(`Sources: ${context.sources.length}`);
|
|
277
|
+
lines.push("");
|
|
278
|
+
for (const source of context.sources) {
|
|
279
|
+
lines.push("---");
|
|
280
|
+
lines.push("");
|
|
281
|
+
lines.push(source.content);
|
|
282
|
+
}
|
|
283
|
+
if (context.overflow.length > 0) {
|
|
284
|
+
lines.push("---");
|
|
285
|
+
lines.push("");
|
|
286
|
+
lines.push("## Overflow (not included due to budget)");
|
|
287
|
+
for (const overflowPath of context.overflow) {
|
|
288
|
+
lines.push(`- ${overflowPath}`);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return lines.join("\n");
|
|
259
292
|
};
|
|
260
293
|
|
|
294
|
+
import { Effect } from "effect";
|
|
261
295
|
// src/summarize/summarizer.ts
|
|
262
296
|
import * as fs from "fs/promises";
|
|
263
297
|
import * as path from "path";
|
|
264
|
-
|
|
298
|
+
|
|
265
299
|
var TOKEN_BUDGETS = {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
300
|
+
brief: 100,
|
|
301
|
+
summary: 500,
|
|
302
|
+
full: Infinity,
|
|
269
303
|
};
|
|
270
304
|
var MIN_SENTENCE_LENGTH = 10;
|
|
271
305
|
var SENTENCE_SCORE_DEFINITION = 2;
|
|
@@ -283,1237 +317,1313 @@ var MAX_TOPIC_LENGTH = 50;
|
|
|
283
317
|
var MAX_TOPICS = 10;
|
|
284
318
|
var MIN_PARTIAL_BUDGET = 50;
|
|
285
319
|
var extractKeyPoints = (content, maxSentences) => {
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
320
|
+
const sentences = content
|
|
321
|
+
.replace(/\n+/g, " ")
|
|
322
|
+
.split(/(?<=[.!?])\s+/)
|
|
323
|
+
.filter((s) => s.trim().length > MIN_SENTENCE_LENGTH);
|
|
324
|
+
if (sentences.length <= maxSentences) {
|
|
325
|
+
return sentences;
|
|
326
|
+
}
|
|
327
|
+
const scored = sentences.map((s) => {
|
|
328
|
+
let score = 0;
|
|
329
|
+
if (s.includes(":")) score += SENTENCE_SCORE_DEFINITION;
|
|
330
|
+
if (/^[A-Z]/.test(s)) score += SENTENCE_SCORE_PROPER_START;
|
|
331
|
+
if (s.length > SENTENCE_LENGTH_MIN && s.length < SENTENCE_LENGTH_MAX)
|
|
332
|
+
score += SENTENCE_SCORE_MEDIUM_LENGTH;
|
|
333
|
+
if (/\*\*|`/.test(s)) score += SENTENCE_SCORE_EMPHASIS;
|
|
334
|
+
return { sentence: s, score };
|
|
335
|
+
});
|
|
336
|
+
scored.sort((a, b) => b.score - a.score);
|
|
337
|
+
return scored.slice(0, maxSentences).map((s) => s.sentence);
|
|
301
338
|
};
|
|
302
339
|
var summarizeSection = (section, level) => {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
340
|
+
const originalTokens = section.metadata.tokenCount;
|
|
341
|
+
const children = section.children.map((child) =>
|
|
342
|
+
summarizeSection(child, level),
|
|
343
|
+
);
|
|
344
|
+
const targetTokens = Math.min(
|
|
345
|
+
TOKEN_BUDGETS[level],
|
|
346
|
+
Math.max(originalTokens * SUMMARY_COMPRESSION_RATIO, MIN_SECTION_TOKENS),
|
|
347
|
+
);
|
|
348
|
+
let summary;
|
|
349
|
+
if (level === "full" || originalTokens <= targetTokens) {
|
|
350
|
+
summary = section.plainText;
|
|
351
|
+
} else if (level === "brief") {
|
|
352
|
+
const meta = [];
|
|
353
|
+
if (section.metadata.hasCode) meta.push("code");
|
|
354
|
+
if (section.metadata.hasList) meta.push("list");
|
|
355
|
+
if (section.metadata.hasTable) meta.push("table");
|
|
356
|
+
summary = meta.length > 0 ? `[${meta.join(", ")}]` : "";
|
|
357
|
+
} else {
|
|
358
|
+
const maxSentences = Math.max(
|
|
359
|
+
MIN_SUMMARY_SENTENCES,
|
|
360
|
+
Math.floor(targetTokens / TOKENS_PER_SENTENCE_ESTIMATE),
|
|
361
|
+
);
|
|
362
|
+
const keyPoints = extractKeyPoints(section.plainText, maxSentences);
|
|
363
|
+
if (keyPoints.length > 0) {
|
|
364
|
+
summary = keyPoints.join(" ");
|
|
365
|
+
} else {
|
|
366
|
+
const words = section.plainText.split(/\s+/).slice(0, targetTokens);
|
|
367
|
+
summary =
|
|
368
|
+
words.join(" ") +
|
|
369
|
+
(words.length < section.plainText.split(/\s+/).length ? "..." : "");
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
const summaryTokens = countTokensApprox(summary);
|
|
373
|
+
return {
|
|
374
|
+
heading: section.heading,
|
|
375
|
+
level: section.level,
|
|
376
|
+
originalTokens,
|
|
377
|
+
summaryTokens,
|
|
378
|
+
summary,
|
|
379
|
+
children,
|
|
380
|
+
hasCode: section.metadata.hasCode,
|
|
381
|
+
hasList: section.metadata.hasList,
|
|
382
|
+
hasTable: section.metadata.hasTable,
|
|
383
|
+
};
|
|
345
384
|
};
|
|
346
385
|
var extractTopics = (document) => {
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
386
|
+
const topics = /* @__PURE__ */ new Set();
|
|
387
|
+
const processSection = (section) => {
|
|
388
|
+
const cleanHeading = section.heading
|
|
389
|
+
.replace(/[:#\-_]/g, " ")
|
|
390
|
+
.trim()
|
|
391
|
+
.toLowerCase();
|
|
392
|
+
if (
|
|
393
|
+
cleanHeading.length > MIN_TOPIC_LENGTH &&
|
|
394
|
+
cleanHeading.length < MAX_TOPIC_LENGTH
|
|
395
|
+
) {
|
|
396
|
+
topics.add(cleanHeading);
|
|
397
|
+
}
|
|
398
|
+
for (const child of section.children) {
|
|
399
|
+
processSection(child);
|
|
400
|
+
}
|
|
401
|
+
};
|
|
402
|
+
for (const section of document.sections) {
|
|
403
|
+
processSection(section);
|
|
404
|
+
}
|
|
405
|
+
const frontmatter = document.frontmatter;
|
|
406
|
+
if (frontmatter.tags && Array.isArray(frontmatter.tags)) {
|
|
407
|
+
for (const tag of frontmatter.tags) {
|
|
408
|
+
if (typeof tag === "string") {
|
|
409
|
+
topics.add(tag.toLowerCase());
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
return Array.from(topics).slice(0, MAX_TOPICS);
|
|
369
414
|
};
|
|
370
415
|
var summarizeDocument = (document, options = {}) => {
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
416
|
+
const level = options.level ?? "summary";
|
|
417
|
+
const maxTokens = options.maxTokens ?? TOKEN_BUDGETS[level];
|
|
418
|
+
const allSections = document.sections.map((s) => summarizeSection(s, level));
|
|
419
|
+
const originalTokens = document.metadata.tokenCount;
|
|
420
|
+
let totalSummaryTokens = 0;
|
|
421
|
+
const flatSections = [];
|
|
422
|
+
const flattenWithTokens = (section) => {
|
|
423
|
+
flatSections.push(section);
|
|
424
|
+
totalSummaryTokens += section.summaryTokens;
|
|
425
|
+
for (const child of section.children) {
|
|
426
|
+
flattenWithTokens(child);
|
|
427
|
+
}
|
|
428
|
+
};
|
|
429
|
+
for (const section of allSections) {
|
|
430
|
+
flattenWithTokens(section);
|
|
431
|
+
}
|
|
432
|
+
const topics = extractTopics(document);
|
|
433
|
+
const headerTemplate = `# ${document.title}
|
|
389
434
|
Path: ${document.path}
|
|
390
435
|
Tokens: 9999 (99% reduction from ${document.metadata.tokenCount})
|
|
391
436
|
`;
|
|
392
|
-
|
|
437
|
+
const topicsLine =
|
|
438
|
+
topics.length > 0
|
|
439
|
+
? `
|
|
393
440
|
**Topics:** ${topics.join(", ")}
|
|
394
|
-
`
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
441
|
+
`
|
|
442
|
+
: "";
|
|
443
|
+
const truncationWarning =
|
|
444
|
+
"\n\u26A0\uFE0F TRUNCATED: 999 sections omitted to fit token budget";
|
|
445
|
+
const baseOverhead = countTokensApprox(
|
|
446
|
+
headerTemplate + topicsLine + truncationWarning,
|
|
447
|
+
);
|
|
448
|
+
const formattingOverhead = Math.ceil(baseOverhead * 1.2) + 20;
|
|
449
|
+
const contentBudget = maxTokens - formattingOverhead;
|
|
450
|
+
let truncated = false;
|
|
451
|
+
let truncatedCount = 0;
|
|
452
|
+
let sections;
|
|
453
|
+
let summaryTokens;
|
|
454
|
+
if (totalSummaryTokens > contentBudget && contentBudget > 0) {
|
|
455
|
+
let tokensUsed = 0;
|
|
456
|
+
const truncateSections = (sectionList) => {
|
|
457
|
+
const result2 = [];
|
|
458
|
+
for (const section of sectionList) {
|
|
459
|
+
const sectionOwnTokens = section.summaryTokens;
|
|
460
|
+
const fitsInBudget = tokensUsed + sectionOwnTokens <= contentBudget;
|
|
461
|
+
if (fitsInBudget) {
|
|
462
|
+
tokensUsed += sectionOwnTokens;
|
|
463
|
+
const truncatedChildren = truncateSections(section.children);
|
|
464
|
+
result2.push({
|
|
465
|
+
...section,
|
|
466
|
+
children: truncatedChildren,
|
|
467
|
+
});
|
|
468
|
+
} else {
|
|
469
|
+
truncatedCount++;
|
|
470
|
+
const rescuedChildren = truncateSections(section.children);
|
|
471
|
+
result2.push(...rescuedChildren);
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
return result2;
|
|
475
|
+
};
|
|
476
|
+
sections = truncateSections(allSections);
|
|
477
|
+
summaryTokens = tokensUsed;
|
|
478
|
+
truncated = truncatedCount > 0;
|
|
479
|
+
} else {
|
|
480
|
+
sections = allSections;
|
|
481
|
+
summaryTokens = totalSummaryTokens;
|
|
482
|
+
}
|
|
483
|
+
const compressionRatio =
|
|
484
|
+
originalTokens > 0 ? 1 - summaryTokens / originalTokens : 0;
|
|
485
|
+
const result = {
|
|
486
|
+
path: document.path,
|
|
487
|
+
title: document.title,
|
|
488
|
+
originalTokens,
|
|
489
|
+
summaryTokens,
|
|
490
|
+
compressionRatio,
|
|
491
|
+
sections,
|
|
492
|
+
keyTopics: topics,
|
|
493
|
+
};
|
|
494
|
+
if (truncated) {
|
|
495
|
+
return {
|
|
496
|
+
...result,
|
|
497
|
+
truncated: true,
|
|
498
|
+
truncatedCount,
|
|
499
|
+
};
|
|
500
|
+
}
|
|
501
|
+
return result;
|
|
452
502
|
};
|
|
453
|
-
var summarizeFile = (filePath, options = {}) =>
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
503
|
+
var summarizeFile = (filePath, options = {}) =>
|
|
504
|
+
Effect.gen(function* () {
|
|
505
|
+
const document = yield* parseFile(filePath).pipe(
|
|
506
|
+
Effect.mapError((e) => new Error(`${e._tag}: ${e.message}`)),
|
|
507
|
+
);
|
|
508
|
+
return summarizeDocument(document, options);
|
|
509
|
+
});
|
|
510
|
+
var assembleContext = (rootPath, sourcePaths, options) =>
|
|
511
|
+
Effect.gen(function* () {
|
|
512
|
+
const budget = options.budget;
|
|
513
|
+
const level = options.level ?? "summary";
|
|
514
|
+
const sources = [];
|
|
515
|
+
const overflow = [];
|
|
516
|
+
let totalTokens = 0;
|
|
517
|
+
const perSourceBudget = Math.floor(budget / sourcePaths.length);
|
|
518
|
+
for (const sourcePath of sourcePaths) {
|
|
519
|
+
const resolvedPath = path.isAbsolute(sourcePath)
|
|
520
|
+
? sourcePath
|
|
521
|
+
: path.join(rootPath, sourcePath);
|
|
522
|
+
try {
|
|
523
|
+
const summary = yield* summarizeFile(resolvedPath, {
|
|
524
|
+
level,
|
|
525
|
+
maxTokens: perSourceBudget,
|
|
526
|
+
});
|
|
527
|
+
const content = formatSummary(summary);
|
|
528
|
+
const tokens = countTokensApprox(content);
|
|
529
|
+
if (totalTokens + tokens <= budget) {
|
|
530
|
+
sources.push({
|
|
531
|
+
path: path.relative(rootPath, resolvedPath),
|
|
532
|
+
title: summary.title,
|
|
533
|
+
tokens,
|
|
534
|
+
content,
|
|
535
|
+
});
|
|
536
|
+
totalTokens += tokens;
|
|
537
|
+
} else {
|
|
538
|
+
const remaining = budget - totalTokens;
|
|
539
|
+
if (remaining > MIN_PARTIAL_BUDGET) {
|
|
540
|
+
const briefSummary = yield* summarizeFile(resolvedPath, {
|
|
541
|
+
level: "brief",
|
|
542
|
+
maxTokens: remaining,
|
|
543
|
+
});
|
|
544
|
+
const briefContent = formatSummary(briefSummary);
|
|
545
|
+
const briefTokens = countTokensApprox(briefContent);
|
|
546
|
+
sources.push({
|
|
547
|
+
path: path.relative(rootPath, resolvedPath),
|
|
548
|
+
title: briefSummary.title,
|
|
549
|
+
tokens: briefTokens,
|
|
550
|
+
content: briefContent,
|
|
551
|
+
});
|
|
552
|
+
totalTokens += briefTokens;
|
|
553
|
+
} else {
|
|
554
|
+
overflow.push(path.relative(rootPath, resolvedPath));
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
} catch (_e) {
|
|
558
|
+
overflow.push(sourcePath);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
return {
|
|
562
|
+
sources,
|
|
563
|
+
totalTokens,
|
|
564
|
+
budget,
|
|
565
|
+
overflow,
|
|
566
|
+
};
|
|
567
|
+
});
|
|
514
568
|
|
|
569
|
+
// src/embeddings/openai-provider.ts
|
|
570
|
+
import { Console, Effect as Effect2, Effect as Effect4 } from "effect";
|
|
515
571
|
// src/embeddings/semantic-search.ts
|
|
516
572
|
import * as fs3 from "fs/promises";
|
|
573
|
+
import OpenAI from "openai";
|
|
517
574
|
import * as path3 from "path";
|
|
518
|
-
import { Effect as Effect4 } from "effect";
|
|
519
575
|
|
|
520
|
-
// src/embeddings/openai-provider.ts
|
|
521
|
-
import { Console, Effect as Effect2 } from "effect";
|
|
522
|
-
import OpenAI from "openai";
|
|
523
576
|
var PRICING = {
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
577
|
+
"text-embedding-3-small": 0.02,
|
|
578
|
+
"text-embedding-3-large": 0.13,
|
|
579
|
+
"text-embedding-ada-002": 0.1,
|
|
527
580
|
};
|
|
528
581
|
var MissingApiKeyError = class extends Error {
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
582
|
+
constructor() {
|
|
583
|
+
super("OPENAI_API_KEY not set");
|
|
584
|
+
this.name = "MissingApiKeyError";
|
|
585
|
+
}
|
|
533
586
|
};
|
|
534
587
|
var InvalidApiKeyError = class extends Error {
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
588
|
+
constructor(message) {
|
|
589
|
+
super(message ?? "Invalid OPENAI_API_KEY");
|
|
590
|
+
this.name = "InvalidApiKeyError";
|
|
591
|
+
}
|
|
539
592
|
};
|
|
540
593
|
var OpenAIProvider = class {
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
594
|
+
name;
|
|
595
|
+
dimensions;
|
|
596
|
+
client;
|
|
597
|
+
model;
|
|
598
|
+
batchSize;
|
|
599
|
+
constructor(options = {}) {
|
|
600
|
+
const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY;
|
|
601
|
+
if (!apiKey) {
|
|
602
|
+
throw new MissingApiKeyError();
|
|
603
|
+
}
|
|
604
|
+
this.client = new OpenAI({ apiKey });
|
|
605
|
+
this.model = options.model ?? "text-embedding-3-small";
|
|
606
|
+
this.batchSize = options.batchSize ?? 100;
|
|
607
|
+
this.name = `openai:${this.model}`;
|
|
608
|
+
this.dimensions = 512;
|
|
609
|
+
}
|
|
610
|
+
async embed(texts) {
|
|
611
|
+
if (texts.length === 0) {
|
|
612
|
+
return { embeddings: [], tokensUsed: 0, cost: 0 };
|
|
613
|
+
}
|
|
614
|
+
const allEmbeddings = [];
|
|
615
|
+
let totalTokens = 0;
|
|
616
|
+
try {
|
|
617
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
618
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
619
|
+
const response = await this.client.embeddings.create({
|
|
620
|
+
model: this.model,
|
|
621
|
+
input: batch,
|
|
622
|
+
dimensions: 512,
|
|
623
|
+
// Ensure consistent dimensions
|
|
624
|
+
});
|
|
625
|
+
for (const item of response.data) {
|
|
626
|
+
allEmbeddings.push(item.embedding);
|
|
627
|
+
}
|
|
628
|
+
totalTokens += response.usage?.total_tokens ?? 0;
|
|
629
|
+
}
|
|
630
|
+
} catch (error) {
|
|
631
|
+
if (error instanceof OpenAI.AuthenticationError) {
|
|
632
|
+
throw new InvalidApiKeyError(error.message);
|
|
633
|
+
}
|
|
634
|
+
throw error;
|
|
635
|
+
}
|
|
636
|
+
const pricePerMillion = PRICING[this.model] ?? 0.02;
|
|
637
|
+
const cost = (totalTokens / 1e6) * pricePerMillion;
|
|
638
|
+
return {
|
|
639
|
+
embeddings: allEmbeddings,
|
|
640
|
+
tokensUsed: totalTokens,
|
|
641
|
+
cost,
|
|
642
|
+
};
|
|
643
|
+
}
|
|
591
644
|
};
|
|
592
645
|
var createOpenAIProvider = (options) => new OpenAIProvider(options);
|
|
593
|
-
var handleApiKeyError = (effect) =>
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
);
|
|
646
|
+
var handleApiKeyError = (effect) =>
|
|
647
|
+
effect.pipe(
|
|
648
|
+
Effect2.catchIf(
|
|
649
|
+
(e) => e instanceof MissingApiKeyError,
|
|
650
|
+
() =>
|
|
651
|
+
Effect2.gen(function* () {
|
|
652
|
+
yield* Console.error("");
|
|
653
|
+
yield* Console.error("Error: OPENAI_API_KEY not set");
|
|
654
|
+
yield* Console.error("");
|
|
655
|
+
yield* Console.error(
|
|
656
|
+
"To use semantic search, set your OpenAI API key:",
|
|
657
|
+
);
|
|
658
|
+
yield* Console.error(" export OPENAI_API_KEY=sk-...");
|
|
659
|
+
yield* Console.error("");
|
|
660
|
+
yield* Console.error("Or add to .env file in project root.");
|
|
661
|
+
return yield* Effect2.fail(new Error("Missing API key"));
|
|
662
|
+
}),
|
|
663
|
+
),
|
|
664
|
+
Effect2.catchIf(
|
|
665
|
+
(e) => e instanceof InvalidApiKeyError,
|
|
666
|
+
(e) =>
|
|
667
|
+
Effect2.gen(function* () {
|
|
668
|
+
yield* Console.error("");
|
|
669
|
+
yield* Console.error("Error: Invalid OPENAI_API_KEY");
|
|
670
|
+
yield* Console.error("");
|
|
671
|
+
yield* Console.error("The provided API key was rejected by OpenAI.");
|
|
672
|
+
yield* Console.error("Please check your API key is correct:");
|
|
673
|
+
yield* Console.error(" export OPENAI_API_KEY=sk-...");
|
|
674
|
+
yield* Console.error("");
|
|
675
|
+
yield* Console.error(`Details: ${e.message}`);
|
|
676
|
+
return yield* Effect2.fail(new Error("Invalid API key"));
|
|
677
|
+
}),
|
|
678
|
+
),
|
|
679
|
+
);
|
|
624
680
|
|
|
681
|
+
import { Effect as Effect3 } from "effect";
|
|
625
682
|
// src/embeddings/vector-store.ts
|
|
626
683
|
import * as fs2 from "fs/promises";
|
|
627
|
-
import * as path2 from "path";
|
|
628
|
-
import { Effect as Effect3 } from "effect";
|
|
629
684
|
import HierarchicalNSW from "hnswlib-node";
|
|
685
|
+
import * as path2 from "path";
|
|
686
|
+
|
|
630
687
|
var VECTOR_INDEX_FILE = "vectors.bin";
|
|
631
688
|
var VECTOR_META_FILE = "vectors.meta.json";
|
|
632
689
|
var INDEX_VERSION = 1;
|
|
633
690
|
var HnswVectorStore = class {
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
691
|
+
rootPath;
|
|
692
|
+
dimensions;
|
|
693
|
+
index = null;
|
|
694
|
+
entries = /* @__PURE__ */ new Map();
|
|
695
|
+
idToIndex = /* @__PURE__ */ new Map();
|
|
696
|
+
nextIndex = 0;
|
|
697
|
+
provider = "unknown";
|
|
698
|
+
totalCost = 0;
|
|
699
|
+
totalTokens = 0;
|
|
700
|
+
constructor(rootPath, dimensions) {
|
|
701
|
+
this.rootPath = path2.resolve(rootPath);
|
|
702
|
+
this.dimensions = dimensions;
|
|
703
|
+
}
|
|
704
|
+
getIndexDir() {
|
|
705
|
+
return path2.join(this.rootPath, INDEX_DIR);
|
|
706
|
+
}
|
|
707
|
+
getVectorPath() {
|
|
708
|
+
return path2.join(this.getIndexDir(), VECTOR_INDEX_FILE);
|
|
709
|
+
}
|
|
710
|
+
getMetaPath() {
|
|
711
|
+
return path2.join(this.getIndexDir(), VECTOR_META_FILE);
|
|
712
|
+
}
|
|
713
|
+
ensureIndex() {
|
|
714
|
+
if (!this.index) {
|
|
715
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
716
|
+
"cosine",
|
|
717
|
+
this.dimensions,
|
|
718
|
+
);
|
|
719
|
+
this.index.initIndex(1e4, 16, 200, 100);
|
|
720
|
+
}
|
|
721
|
+
return this.index;
|
|
722
|
+
}
|
|
723
|
+
add(entries) {
|
|
724
|
+
return Effect3.sync(() => {
|
|
725
|
+
const index = this.ensureIndex();
|
|
726
|
+
for (const entry of entries) {
|
|
727
|
+
if (this.idToIndex.has(entry.id)) {
|
|
728
|
+
continue;
|
|
729
|
+
}
|
|
730
|
+
const idx = this.nextIndex++;
|
|
731
|
+
if (idx >= index.getMaxElements()) {
|
|
732
|
+
index.resizeIndex(index.getMaxElements() * 2);
|
|
733
|
+
}
|
|
734
|
+
index.addPoint(entry.embedding, idx);
|
|
735
|
+
this.entries.set(idx, entry);
|
|
736
|
+
this.idToIndex.set(entry.id, idx);
|
|
737
|
+
}
|
|
738
|
+
});
|
|
739
|
+
}
|
|
740
|
+
search(vector, limit, threshold = 0) {
|
|
741
|
+
return Effect3.sync(() => {
|
|
742
|
+
if (!this.index || this.entries.size === 0) {
|
|
743
|
+
return [];
|
|
744
|
+
}
|
|
745
|
+
const result = this.index.searchKnn(
|
|
746
|
+
vector,
|
|
747
|
+
Math.min(limit, this.entries.size),
|
|
748
|
+
);
|
|
749
|
+
const results = [];
|
|
750
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
751
|
+
const idx = result.neighbors[i];
|
|
752
|
+
const distance = result.distances[i];
|
|
753
|
+
if (idx === void 0 || distance === void 0) {
|
|
754
|
+
continue;
|
|
755
|
+
}
|
|
756
|
+
const similarity = 1 - distance;
|
|
757
|
+
if (similarity < threshold) {
|
|
758
|
+
continue;
|
|
759
|
+
}
|
|
760
|
+
const entry = this.entries.get(idx);
|
|
761
|
+
if (entry) {
|
|
762
|
+
results.push({
|
|
763
|
+
id: entry.id,
|
|
764
|
+
sectionId: entry.sectionId,
|
|
765
|
+
documentPath: entry.documentPath,
|
|
766
|
+
heading: entry.heading,
|
|
767
|
+
similarity,
|
|
768
|
+
});
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
return results;
|
|
772
|
+
});
|
|
773
|
+
}
|
|
774
|
+
save() {
|
|
775
|
+
return Effect3.gen(
|
|
776
|
+
function* () {
|
|
777
|
+
if (!this.index) {
|
|
778
|
+
return;
|
|
779
|
+
}
|
|
780
|
+
const indexDir = this.getIndexDir();
|
|
781
|
+
yield* Effect3.promise(() => fs2.mkdir(indexDir, { recursive: true }));
|
|
782
|
+
yield* Effect3.promise(() =>
|
|
783
|
+
this.index.writeIndex(this.getVectorPath()),
|
|
784
|
+
);
|
|
785
|
+
const meta = {
|
|
786
|
+
version: INDEX_VERSION,
|
|
787
|
+
provider: this.provider,
|
|
788
|
+
dimensions: this.dimensions,
|
|
789
|
+
entries: Object.fromEntries(
|
|
790
|
+
Array.from(this.entries.entries()).map(([idx, entry]) => [
|
|
791
|
+
idx.toString(),
|
|
792
|
+
entry,
|
|
793
|
+
]),
|
|
794
|
+
),
|
|
795
|
+
totalCost: this.totalCost,
|
|
796
|
+
totalTokens: this.totalTokens,
|
|
797
|
+
createdAt: /* @__PURE__ */ new Date().toISOString(),
|
|
798
|
+
updatedAt: /* @__PURE__ */ new Date().toISOString(),
|
|
799
|
+
};
|
|
800
|
+
yield* Effect3.promise(() =>
|
|
801
|
+
fs2.writeFile(this.getMetaPath(), JSON.stringify(meta, null, 2)),
|
|
802
|
+
);
|
|
803
|
+
}.bind(this),
|
|
804
|
+
);
|
|
805
|
+
}
|
|
806
|
+
load() {
|
|
807
|
+
return Effect3.gen(
|
|
808
|
+
function* () {
|
|
809
|
+
const vectorPath = this.getVectorPath();
|
|
810
|
+
const metaPath = this.getMetaPath();
|
|
811
|
+
const filesExist = yield* Effect3.tryPromise({
|
|
812
|
+
try: async () => {
|
|
813
|
+
await fs2.access(vectorPath);
|
|
814
|
+
await fs2.access(metaPath);
|
|
815
|
+
return true;
|
|
816
|
+
},
|
|
817
|
+
catch: () => false,
|
|
818
|
+
}).pipe(Effect3.catchAll(() => Effect3.succeed(false)));
|
|
819
|
+
if (!filesExist) {
|
|
820
|
+
return false;
|
|
821
|
+
}
|
|
822
|
+
const metaContent = yield* Effect3.promise(() =>
|
|
823
|
+
fs2.readFile(metaPath, "utf-8"),
|
|
824
|
+
);
|
|
825
|
+
const meta = JSON.parse(metaContent);
|
|
826
|
+
if (meta.dimensions !== this.dimensions) {
|
|
827
|
+
return false;
|
|
828
|
+
}
|
|
829
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
830
|
+
"cosine",
|
|
831
|
+
this.dimensions,
|
|
832
|
+
);
|
|
833
|
+
yield* Effect3.promise(() => this.index.readIndex(vectorPath));
|
|
834
|
+
this.entries.clear();
|
|
835
|
+
this.idToIndex.clear();
|
|
836
|
+
this.nextIndex = 0;
|
|
837
|
+
for (const [idxStr, entry] of Object.entries(meta.entries)) {
|
|
838
|
+
const idx = parseInt(idxStr, 10);
|
|
839
|
+
this.entries.set(idx, entry);
|
|
840
|
+
this.idToIndex.set(entry.id, idx);
|
|
841
|
+
this.nextIndex = Math.max(this.nextIndex, idx + 1);
|
|
842
|
+
}
|
|
843
|
+
this.provider = meta.provider;
|
|
844
|
+
this.totalCost = meta.totalCost;
|
|
845
|
+
this.totalTokens = meta.totalTokens;
|
|
846
|
+
return true;
|
|
847
|
+
}.bind(this),
|
|
848
|
+
);
|
|
849
|
+
}
|
|
850
|
+
getStats() {
|
|
851
|
+
return {
|
|
852
|
+
count: this.entries.size,
|
|
853
|
+
dimensions: this.dimensions,
|
|
854
|
+
provider: this.provider,
|
|
855
|
+
totalCost: this.totalCost,
|
|
856
|
+
totalTokens: this.totalTokens,
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
setProvider(name) {
|
|
860
|
+
this.provider = name;
|
|
861
|
+
}
|
|
862
|
+
addCost(cost, tokens) {
|
|
863
|
+
this.totalCost += cost;
|
|
864
|
+
this.totalTokens += tokens;
|
|
865
|
+
}
|
|
809
866
|
};
|
|
810
|
-
var createVectorStore = (rootPath, dimensions) =>
|
|
867
|
+
var createVectorStore = (rootPath, dimensions) =>
|
|
868
|
+
new HnswVectorStore(rootPath, dimensions);
|
|
811
869
|
|
|
812
870
|
// src/embeddings/semantic-search.ts
|
|
813
|
-
var generateEmbeddingText = (
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
871
|
+
var generateEmbeddingText = (
|
|
872
|
+
section,
|
|
873
|
+
content,
|
|
874
|
+
documentTitle,
|
|
875
|
+
parentHeading,
|
|
876
|
+
) => {
|
|
877
|
+
const parts = [];
|
|
878
|
+
parts.push(`# ${section.heading}`);
|
|
879
|
+
if (parentHeading) {
|
|
880
|
+
parts.push(`Parent section: ${parentHeading}`);
|
|
881
|
+
}
|
|
882
|
+
parts.push(`Document: ${documentTitle}`);
|
|
883
|
+
parts.push("");
|
|
884
|
+
parts.push(content);
|
|
885
|
+
return parts.join("\n");
|
|
823
886
|
};
|
|
824
887
|
var EMBEDDING_PRICE_PER_MILLION = 0.02;
|
|
825
|
-
var estimateEmbeddingCost = (rootPath, options = {}) =>
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
}
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
888
|
+
var estimateEmbeddingCost = (rootPath, options = {}) =>
|
|
889
|
+
Effect4.gen(function* () {
|
|
890
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
891
|
+
const storage = createStorage(resolvedRoot);
|
|
892
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
893
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
894
|
+
if (!docIndex || !sectionIndex) {
|
|
895
|
+
return yield* Effect4.fail(
|
|
896
|
+
new Error("Index not found. Run 'mdcontext index' first."),
|
|
897
|
+
);
|
|
898
|
+
}
|
|
899
|
+
const byDir = /* @__PURE__ */ new Map();
|
|
900
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
901
|
+
if (section.tokenCount < 10) continue;
|
|
902
|
+
if (options.excludePatterns?.length) {
|
|
903
|
+
const excluded = options.excludePatterns.some((pattern) => {
|
|
904
|
+
const regex = new RegExp(
|
|
905
|
+
`^${pattern.replace(/\*/g, ".*").replace(/\?/g, ".")}$`,
|
|
906
|
+
);
|
|
907
|
+
return regex.test(section.documentPath);
|
|
908
|
+
});
|
|
909
|
+
if (excluded) continue;
|
|
910
|
+
}
|
|
911
|
+
const dir = path3.dirname(section.documentPath) || ".";
|
|
912
|
+
if (!byDir.has(dir)) {
|
|
913
|
+
byDir.set(dir, {
|
|
914
|
+
files: /* @__PURE__ */ new Set(),
|
|
915
|
+
sections: 0,
|
|
916
|
+
tokens: 0,
|
|
917
|
+
});
|
|
918
|
+
}
|
|
919
|
+
const entry = byDir.get(dir);
|
|
920
|
+
entry.files.add(section.documentPath);
|
|
921
|
+
entry.sections++;
|
|
922
|
+
entry.tokens += section.tokenCount;
|
|
923
|
+
}
|
|
924
|
+
const directoryEstimates = [];
|
|
925
|
+
let totalFiles = 0;
|
|
926
|
+
let totalSections = 0;
|
|
927
|
+
let totalTokens = 0;
|
|
928
|
+
for (const [dir, data] of byDir) {
|
|
929
|
+
directoryEstimates.push({
|
|
930
|
+
directory: dir,
|
|
931
|
+
fileCount: data.files.size,
|
|
932
|
+
sectionCount: data.sections,
|
|
933
|
+
estimatedTokens: data.tokens,
|
|
934
|
+
estimatedCost: (data.tokens / 1e6) * EMBEDDING_PRICE_PER_MILLION,
|
|
935
|
+
});
|
|
936
|
+
totalFiles += data.files.size;
|
|
937
|
+
totalSections += data.sections;
|
|
938
|
+
totalTokens += data.tokens;
|
|
939
|
+
}
|
|
940
|
+
directoryEstimates.sort((a, b) => a.directory.localeCompare(b.directory));
|
|
941
|
+
const estimatedTimeSeconds = Math.ceil(totalSections / 100) * 1.5;
|
|
942
|
+
return {
|
|
943
|
+
totalFiles,
|
|
944
|
+
totalSections,
|
|
945
|
+
totalTokens,
|
|
946
|
+
totalCost: (totalTokens / 1e6) * EMBEDDING_PRICE_PER_MILLION,
|
|
947
|
+
estimatedTimeSeconds,
|
|
948
|
+
byDirectory: directoryEstimates,
|
|
949
|
+
};
|
|
950
|
+
});
|
|
951
|
+
var buildEmbeddings = (rootPath, options = {}) =>
|
|
952
|
+
Effect4.gen(function* () {
|
|
953
|
+
const startTime = Date.now();
|
|
954
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
955
|
+
const storage = createStorage(resolvedRoot);
|
|
956
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
957
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
958
|
+
if (!docIndex || !sectionIndex) {
|
|
959
|
+
return yield* Effect4.fail(
|
|
960
|
+
new Error("Index not found. Run 'mdcontext index' first."),
|
|
961
|
+
);
|
|
962
|
+
}
|
|
963
|
+
const provider =
|
|
964
|
+
options.provider ??
|
|
965
|
+
(yield* Effect4.try({
|
|
966
|
+
try: () => createOpenAIProvider(),
|
|
967
|
+
catch: (e) => e,
|
|
968
|
+
}));
|
|
969
|
+
const dimensions = provider.dimensions;
|
|
970
|
+
const vectorStore = createVectorStore(resolvedRoot, dimensions);
|
|
971
|
+
vectorStore.setProvider(provider.name);
|
|
972
|
+
if (!options.force) {
|
|
973
|
+
const loaded = yield* vectorStore.load();
|
|
974
|
+
if (loaded) {
|
|
975
|
+
const stats = vectorStore.getStats();
|
|
976
|
+
if (stats.count > 0) {
|
|
977
|
+
const duration2 = Date.now() - startTime;
|
|
978
|
+
const estimatedSavings =
|
|
979
|
+
(stats.totalTokens / 1e6) * EMBEDDING_PRICE_PER_MILLION;
|
|
980
|
+
return {
|
|
981
|
+
sectionsEmbedded: 0,
|
|
982
|
+
tokensUsed: 0,
|
|
983
|
+
cost: 0,
|
|
984
|
+
duration: duration2,
|
|
985
|
+
filesProcessed: 0,
|
|
986
|
+
cacheHit: true,
|
|
987
|
+
existingVectors: stats.count,
|
|
988
|
+
estimatedSavings,
|
|
989
|
+
};
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
const isExcluded = (docPath) => {
|
|
994
|
+
if (!options.excludePatterns?.length) return false;
|
|
995
|
+
return options.excludePatterns.some((pattern) => {
|
|
996
|
+
const regex = new RegExp(
|
|
997
|
+
`^${pattern.replace(/\*/g, ".*").replace(/\?/g, ".")}$`,
|
|
998
|
+
);
|
|
999
|
+
return regex.test(docPath);
|
|
1000
|
+
});
|
|
1001
|
+
};
|
|
1002
|
+
const sectionsByDoc = /* @__PURE__ */ new Map();
|
|
1003
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
1004
|
+
const document = docIndex.documents[section.documentPath];
|
|
1005
|
+
if (!document) continue;
|
|
1006
|
+
if (section.tokenCount < 10) continue;
|
|
1007
|
+
if (isExcluded(section.documentPath)) continue;
|
|
1008
|
+
let parentHeading;
|
|
1009
|
+
if (section.level > 1) {
|
|
1010
|
+
const docSections = sectionIndex.byDocument[document.id] ?? [];
|
|
1011
|
+
for (const sibId of docSections) {
|
|
1012
|
+
const sib = sectionIndex.sections[sibId];
|
|
1013
|
+
if (
|
|
1014
|
+
sib &&
|
|
1015
|
+
sib.level === section.level - 1 &&
|
|
1016
|
+
sib.startLine < section.startLine
|
|
1017
|
+
) {
|
|
1018
|
+
parentHeading = sib.heading;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
const docPath = section.documentPath;
|
|
1023
|
+
if (!sectionsByDoc.has(docPath)) {
|
|
1024
|
+
sectionsByDoc.set(docPath, []);
|
|
1025
|
+
}
|
|
1026
|
+
sectionsByDoc.get(docPath).push({ section, parentHeading });
|
|
1027
|
+
}
|
|
1028
|
+
if (sectionsByDoc.size === 0) {
|
|
1029
|
+
const duration2 = Date.now() - startTime;
|
|
1030
|
+
return {
|
|
1031
|
+
sectionsEmbedded: 0,
|
|
1032
|
+
tokensUsed: 0,
|
|
1033
|
+
cost: 0,
|
|
1034
|
+
duration: duration2,
|
|
1035
|
+
filesProcessed: 0,
|
|
1036
|
+
};
|
|
1037
|
+
}
|
|
1038
|
+
const sectionsToEmbed = [];
|
|
1039
|
+
const docPaths = Array.from(sectionsByDoc.keys());
|
|
1040
|
+
let filesProcessed = 0;
|
|
1041
|
+
for (let fileIndex = 0; fileIndex < docPaths.length; fileIndex++) {
|
|
1042
|
+
const docPath = docPaths[fileIndex];
|
|
1043
|
+
const sections = sectionsByDoc.get(docPath);
|
|
1044
|
+
const document = docIndex.documents[docPath];
|
|
1045
|
+
if (!document) continue;
|
|
1046
|
+
if (options.onFileProgress) {
|
|
1047
|
+
options.onFileProgress({
|
|
1048
|
+
fileIndex: fileIndex + 1,
|
|
1049
|
+
totalFiles: docPaths.length,
|
|
1050
|
+
filePath: docPath,
|
|
1051
|
+
sectionCount: sections.length,
|
|
1052
|
+
});
|
|
1053
|
+
}
|
|
1054
|
+
const filePath = path3.join(resolvedRoot, docPath);
|
|
1055
|
+
let fileContent;
|
|
1056
|
+
try {
|
|
1057
|
+
fileContent = yield* Effect4.promise(() =>
|
|
1058
|
+
fs3.readFile(filePath, "utf-8"),
|
|
1059
|
+
);
|
|
1060
|
+
} catch {
|
|
1061
|
+
continue;
|
|
1062
|
+
}
|
|
1063
|
+
filesProcessed++;
|
|
1064
|
+
const lines = fileContent.split("\n");
|
|
1065
|
+
for (const { section, parentHeading } of sections) {
|
|
1066
|
+
const content = lines
|
|
1067
|
+
.slice(section.startLine - 1, section.endLine)
|
|
1068
|
+
.join("\n");
|
|
1069
|
+
const text = generateEmbeddingText(
|
|
1070
|
+
section,
|
|
1071
|
+
content,
|
|
1072
|
+
document.title,
|
|
1073
|
+
parentHeading,
|
|
1074
|
+
);
|
|
1075
|
+
sectionsToEmbed.push({ section, text });
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
if (sectionsToEmbed.length === 0) {
|
|
1079
|
+
const duration2 = Date.now() - startTime;
|
|
1080
|
+
return {
|
|
1081
|
+
sectionsEmbedded: 0,
|
|
1082
|
+
tokensUsed: 0,
|
|
1083
|
+
cost: 0,
|
|
1084
|
+
duration: duration2,
|
|
1085
|
+
filesProcessed,
|
|
1086
|
+
};
|
|
1087
|
+
}
|
|
1088
|
+
const texts = sectionsToEmbed.map((s) => s.text);
|
|
1089
|
+
const result = yield* Effect4.tryPromise({
|
|
1090
|
+
try: () => provider.embed(texts),
|
|
1091
|
+
catch: (e) => {
|
|
1092
|
+
if (e instanceof InvalidApiKeyError) return e;
|
|
1093
|
+
return new Error(
|
|
1094
|
+
`Embedding failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
1095
|
+
);
|
|
1096
|
+
},
|
|
1097
|
+
});
|
|
1098
|
+
const entries = [];
|
|
1099
|
+
for (let i = 0; i < sectionsToEmbed.length; i++) {
|
|
1100
|
+
const { section } = sectionsToEmbed[i] ?? { section: null };
|
|
1101
|
+
const embedding = result.embeddings[i];
|
|
1102
|
+
if (!section || !embedding) continue;
|
|
1103
|
+
entries.push({
|
|
1104
|
+
id: section.id,
|
|
1105
|
+
sectionId: section.id,
|
|
1106
|
+
documentPath: section.documentPath,
|
|
1107
|
+
heading: section.heading,
|
|
1108
|
+
embedding,
|
|
1109
|
+
});
|
|
1110
|
+
}
|
|
1111
|
+
yield* vectorStore.add(entries);
|
|
1112
|
+
vectorStore.addCost(result.cost, result.tokensUsed);
|
|
1113
|
+
yield* vectorStore.save();
|
|
1114
|
+
const duration = Date.now() - startTime;
|
|
1115
|
+
return {
|
|
1116
|
+
sectionsEmbedded: entries.length,
|
|
1117
|
+
tokensUsed: result.tokensUsed,
|
|
1118
|
+
cost: result.cost,
|
|
1119
|
+
duration,
|
|
1120
|
+
filesProcessed,
|
|
1121
|
+
};
|
|
1122
|
+
});
|
|
1123
|
+
var semanticSearch = (rootPath, query, options = {}) =>
|
|
1124
|
+
Effect4.gen(function* () {
|
|
1125
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
1126
|
+
const provider = yield* Effect4.try({
|
|
1127
|
+
try: () => createOpenAIProvider(),
|
|
1128
|
+
catch: (e) => e,
|
|
1129
|
+
});
|
|
1130
|
+
const dimensions = provider.dimensions;
|
|
1131
|
+
const vectorStore = createVectorStore(resolvedRoot, dimensions);
|
|
1132
|
+
const loaded = yield* vectorStore.load();
|
|
1133
|
+
if (!loaded) {
|
|
1134
|
+
return yield* Effect4.fail(
|
|
1135
|
+
new Error("Embeddings not found. Run 'mdcontext embed' first."),
|
|
1136
|
+
);
|
|
1137
|
+
}
|
|
1138
|
+
const queryResult = yield* Effect4.tryPromise({
|
|
1139
|
+
try: () => provider.embed([query]),
|
|
1140
|
+
catch: (e) =>
|
|
1141
|
+
new Error(
|
|
1142
|
+
`Query embedding failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
1143
|
+
),
|
|
1144
|
+
});
|
|
1145
|
+
const queryVector = queryResult.embeddings[0];
|
|
1146
|
+
if (!queryVector) {
|
|
1147
|
+
return yield* Effect4.fail(
|
|
1148
|
+
new Error("Failed to generate query embedding"),
|
|
1149
|
+
);
|
|
1150
|
+
}
|
|
1151
|
+
const limit = options.limit ?? 10;
|
|
1152
|
+
const threshold = options.threshold ?? 0;
|
|
1153
|
+
const searchResults = yield* vectorStore.search(
|
|
1154
|
+
queryVector,
|
|
1155
|
+
limit * 2,
|
|
1156
|
+
threshold,
|
|
1157
|
+
);
|
|
1158
|
+
let filteredResults = searchResults;
|
|
1159
|
+
if (options.pathPattern) {
|
|
1160
|
+
const pattern = options.pathPattern
|
|
1161
|
+
.replace(/\./g, "\\.")
|
|
1162
|
+
.replace(/\*/g, ".*");
|
|
1163
|
+
const regex = new RegExp(`^${pattern}$`, "i");
|
|
1164
|
+
filteredResults = searchResults.filter((r) => regex.test(r.documentPath));
|
|
1165
|
+
}
|
|
1166
|
+
const results = filteredResults.slice(0, limit).map((r) => ({
|
|
1167
|
+
sectionId: r.sectionId,
|
|
1168
|
+
documentPath: r.documentPath,
|
|
1169
|
+
heading: r.heading,
|
|
1170
|
+
similarity: r.similarity,
|
|
1171
|
+
}));
|
|
1172
|
+
return results;
|
|
1173
|
+
});
|
|
1174
|
+
var getEmbeddingStats = (rootPath) =>
|
|
1175
|
+
Effect4.gen(function* () {
|
|
1176
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
1177
|
+
const vectorStore = createVectorStore(resolvedRoot, 1536);
|
|
1178
|
+
const loaded = yield* vectorStore.load();
|
|
1179
|
+
if (!loaded) {
|
|
1180
|
+
return {
|
|
1181
|
+
hasEmbeddings: false,
|
|
1182
|
+
count: 0,
|
|
1183
|
+
provider: "none",
|
|
1184
|
+
dimensions: 0,
|
|
1185
|
+
totalCost: 0,
|
|
1186
|
+
totalTokens: 0,
|
|
1187
|
+
};
|
|
1188
|
+
}
|
|
1189
|
+
const stats = vectorStore.getStats();
|
|
1190
|
+
return {
|
|
1191
|
+
hasEmbeddings: true,
|
|
1192
|
+
count: stats.count,
|
|
1193
|
+
provider: stats.provider,
|
|
1194
|
+
dimensions: stats.dimensions,
|
|
1195
|
+
totalCost: stats.totalCost,
|
|
1196
|
+
totalTokens: stats.totalTokens,
|
|
1197
|
+
};
|
|
1198
|
+
});
|
|
1117
1199
|
|
|
1200
|
+
import { Effect as Effect5 } from "effect";
|
|
1118
1201
|
// src/search/searcher.ts
|
|
1119
1202
|
import * as fs4 from "fs/promises";
|
|
1120
1203
|
import * as path4 from "path";
|
|
1121
|
-
import { Effect as Effect5 } from "effect";
|
|
1122
1204
|
|
|
1123
1205
|
// src/search/query-parser.ts
|
|
1124
1206
|
var tokenize = (query) => {
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1207
|
+
const tokens = [];
|
|
1208
|
+
let i = 0;
|
|
1209
|
+
while (i < query.length) {
|
|
1210
|
+
if (/\s/.test(query[i])) {
|
|
1211
|
+
i++;
|
|
1212
|
+
continue;
|
|
1213
|
+
}
|
|
1214
|
+
if (query[i] === '"') {
|
|
1215
|
+
const start = i + 1;
|
|
1216
|
+
i++;
|
|
1217
|
+
while (i < query.length && query[i] !== '"') {
|
|
1218
|
+
i++;
|
|
1219
|
+
}
|
|
1220
|
+
const value = query.slice(start, i);
|
|
1221
|
+
tokens.push({ type: "PHRASE", value });
|
|
1222
|
+
i++;
|
|
1223
|
+
continue;
|
|
1224
|
+
}
|
|
1225
|
+
if (query[i] === "(") {
|
|
1226
|
+
tokens.push({ type: "LPAREN", value: "(" });
|
|
1227
|
+
i++;
|
|
1228
|
+
continue;
|
|
1229
|
+
}
|
|
1230
|
+
if (query[i] === ")") {
|
|
1231
|
+
tokens.push({ type: "RPAREN", value: ")" });
|
|
1232
|
+
i++;
|
|
1233
|
+
continue;
|
|
1234
|
+
}
|
|
1235
|
+
const wordMatch = query.slice(i).match(/^[^\s()"]+/);
|
|
1236
|
+
if (wordMatch) {
|
|
1237
|
+
const word = wordMatch[0];
|
|
1238
|
+
const upperWord = word.toUpperCase();
|
|
1239
|
+
if (upperWord === "AND") {
|
|
1240
|
+
tokens.push({ type: "AND", value: "AND" });
|
|
1241
|
+
} else if (upperWord === "OR") {
|
|
1242
|
+
tokens.push({ type: "OR", value: "OR" });
|
|
1243
|
+
} else if (upperWord === "NOT") {
|
|
1244
|
+
tokens.push({ type: "NOT", value: "NOT" });
|
|
1245
|
+
} else {
|
|
1246
|
+
tokens.push({ type: "TERM", value: word });
|
|
1247
|
+
}
|
|
1248
|
+
i += word.length;
|
|
1249
|
+
continue;
|
|
1250
|
+
}
|
|
1251
|
+
i++;
|
|
1252
|
+
}
|
|
1253
|
+
return tokens;
|
|
1172
1254
|
};
|
|
1173
1255
|
var Parser = class {
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1256
|
+
tokens;
|
|
1257
|
+
pos = 0;
|
|
1258
|
+
terms = [];
|
|
1259
|
+
phrases = [];
|
|
1260
|
+
constructor(tokens) {
|
|
1261
|
+
this.tokens = tokens;
|
|
1262
|
+
}
|
|
1263
|
+
current() {
|
|
1264
|
+
return this.tokens[this.pos];
|
|
1265
|
+
}
|
|
1266
|
+
advance() {
|
|
1267
|
+
return this.tokens[this.pos++];
|
|
1268
|
+
}
|
|
1269
|
+
match(type) {
|
|
1270
|
+
if (this.current()?.type === type) {
|
|
1271
|
+
this.advance();
|
|
1272
|
+
return true;
|
|
1273
|
+
}
|
|
1274
|
+
return false;
|
|
1275
|
+
}
|
|
1276
|
+
parse() {
|
|
1277
|
+
if (this.tokens.length === 0) {
|
|
1278
|
+
return null;
|
|
1279
|
+
}
|
|
1280
|
+
return this.parseExpr();
|
|
1281
|
+
}
|
|
1282
|
+
parseExpr() {
|
|
1283
|
+
let left = this.parseAndExpr();
|
|
1284
|
+
while (this.match("OR")) {
|
|
1285
|
+
const right = this.parseAndExpr();
|
|
1286
|
+
left = { type: "or", left, right };
|
|
1287
|
+
}
|
|
1288
|
+
return left;
|
|
1289
|
+
}
|
|
1290
|
+
parseAndExpr() {
|
|
1291
|
+
let left = this.parseNotExpr();
|
|
1292
|
+
while (this.match("AND") || this.isImplicitAnd()) {
|
|
1293
|
+
const right = this.parseNotExpr();
|
|
1294
|
+
left = { type: "and", left, right };
|
|
1295
|
+
}
|
|
1296
|
+
return left;
|
|
1297
|
+
}
|
|
1298
|
+
isImplicitAnd() {
|
|
1299
|
+
const tok = this.current();
|
|
1300
|
+
return (
|
|
1301
|
+
tok?.type === "TERM" ||
|
|
1302
|
+
tok?.type === "PHRASE" ||
|
|
1303
|
+
tok?.type === "NOT" ||
|
|
1304
|
+
tok?.type === "LPAREN"
|
|
1305
|
+
);
|
|
1306
|
+
}
|
|
1307
|
+
parseNotExpr() {
|
|
1308
|
+
if (this.match("NOT")) {
|
|
1309
|
+
const operand = this.parseNotExpr();
|
|
1310
|
+
return { type: "not", operand };
|
|
1311
|
+
}
|
|
1312
|
+
return this.parsePrimary();
|
|
1313
|
+
}
|
|
1314
|
+
parsePrimary() {
|
|
1315
|
+
const tok = this.current();
|
|
1316
|
+
if (this.match("LPAREN")) {
|
|
1317
|
+
const expr = this.parseExpr();
|
|
1318
|
+
this.match("RPAREN");
|
|
1319
|
+
return expr;
|
|
1320
|
+
}
|
|
1321
|
+
if (tok?.type === "PHRASE") {
|
|
1322
|
+
this.advance();
|
|
1323
|
+
this.phrases.push(tok.value);
|
|
1324
|
+
return { type: "phrase", value: tok.value };
|
|
1325
|
+
}
|
|
1326
|
+
if (tok?.type === "TERM") {
|
|
1327
|
+
this.advance();
|
|
1328
|
+
this.terms.push(tok.value);
|
|
1329
|
+
return { type: "term", value: tok.value };
|
|
1330
|
+
}
|
|
1331
|
+
return { type: "term", value: "" };
|
|
1332
|
+
}
|
|
1246
1333
|
};
|
|
1247
1334
|
var parseQuery = (query) => {
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1335
|
+
const tokens = tokenize(query);
|
|
1336
|
+
if (tokens.length === 0) {
|
|
1337
|
+
return null;
|
|
1338
|
+
}
|
|
1339
|
+
const parser = new Parser(tokens);
|
|
1340
|
+
const ast = parser.parse();
|
|
1341
|
+
if (!ast) {
|
|
1342
|
+
return null;
|
|
1343
|
+
}
|
|
1344
|
+
return {
|
|
1345
|
+
ast,
|
|
1346
|
+
terms: parser.terms,
|
|
1347
|
+
phrases: parser.phrases,
|
|
1348
|
+
};
|
|
1262
1349
|
};
|
|
1263
1350
|
var isAdvancedQuery = (query) => {
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1351
|
+
const tokens = tokenize(query);
|
|
1352
|
+
return tokens.some(
|
|
1353
|
+
(t) =>
|
|
1354
|
+
t.type === "AND" ||
|
|
1355
|
+
t.type === "OR" ||
|
|
1356
|
+
t.type === "NOT" ||
|
|
1357
|
+
t.type === "PHRASE" ||
|
|
1358
|
+
t.type === "LPAREN",
|
|
1359
|
+
);
|
|
1268
1360
|
};
|
|
1269
1361
|
var evaluateQuery = (ast, text) => {
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1362
|
+
const lowerText = text.toLowerCase();
|
|
1363
|
+
const evaluate = (node) => {
|
|
1364
|
+
switch (node.type) {
|
|
1365
|
+
case "term": {
|
|
1366
|
+
if (!node.value) return true;
|
|
1367
|
+
return lowerText.includes(node.value.toLowerCase());
|
|
1368
|
+
}
|
|
1369
|
+
case "phrase": {
|
|
1370
|
+
return lowerText.includes(node.value.toLowerCase());
|
|
1371
|
+
}
|
|
1372
|
+
case "and": {
|
|
1373
|
+
return evaluate(node.left) && evaluate(node.right);
|
|
1374
|
+
}
|
|
1375
|
+
case "or": {
|
|
1376
|
+
return evaluate(node.left) || evaluate(node.right);
|
|
1377
|
+
}
|
|
1378
|
+
case "not": {
|
|
1379
|
+
return !evaluate(node.operand);
|
|
1380
|
+
}
|
|
1381
|
+
}
|
|
1382
|
+
};
|
|
1383
|
+
return evaluate(ast);
|
|
1292
1384
|
};
|
|
1293
1385
|
var buildHighlightPattern = (parsed) => {
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1386
|
+
const patterns = [];
|
|
1387
|
+
const escapeChars = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1388
|
+
for (const term of parsed.terms) {
|
|
1389
|
+
if (term) {
|
|
1390
|
+
patterns.push(`\\b${escapeChars(term)}\\b`);
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
for (const phrase of parsed.phrases) {
|
|
1394
|
+
if (phrase) {
|
|
1395
|
+
patterns.push(escapeChars(phrase));
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
if (patterns.length === 0) {
|
|
1399
|
+
return /.^/;
|
|
1400
|
+
}
|
|
1401
|
+
return new RegExp(patterns.join("|"), "gi");
|
|
1310
1402
|
};
|
|
1311
1403
|
|
|
1312
1404
|
// src/search/searcher.ts
|
|
1313
1405
|
var matchPath = (filePath, pattern) => {
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1406
|
+
const regexPattern = pattern
|
|
1407
|
+
.replace(/\./g, "\\.")
|
|
1408
|
+
.replace(/\*/g, ".*")
|
|
1409
|
+
.replace(/\?/g, ".");
|
|
1410
|
+
const regex = new RegExp(`^${regexPattern}$`, "i");
|
|
1411
|
+
return regex.test(filePath);
|
|
1317
1412
|
};
|
|
1318
|
-
var search = (rootPath, options = {}) =>
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1413
|
+
var search = (rootPath, options = {}) =>
|
|
1414
|
+
Effect5.gen(function* () {
|
|
1415
|
+
const storage = createStorage(rootPath);
|
|
1416
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
1417
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
1418
|
+
if (!docIndex || !sectionIndex) {
|
|
1419
|
+
return [];
|
|
1420
|
+
}
|
|
1421
|
+
const results = [];
|
|
1422
|
+
const headingRegex = options.heading
|
|
1423
|
+
? new RegExp(options.heading, "i")
|
|
1424
|
+
: null;
|
|
1425
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
1426
|
+
if (headingRegex && !headingRegex.test(section.heading)) {
|
|
1427
|
+
continue;
|
|
1428
|
+
}
|
|
1429
|
+
if (
|
|
1430
|
+
options.pathPattern &&
|
|
1431
|
+
!matchPath(section.documentPath, options.pathPattern)
|
|
1432
|
+
) {
|
|
1433
|
+
continue;
|
|
1434
|
+
}
|
|
1435
|
+
if (options.hasCode !== void 0 && section.hasCode !== options.hasCode) {
|
|
1436
|
+
continue;
|
|
1437
|
+
}
|
|
1438
|
+
if (options.hasList !== void 0 && section.hasList !== options.hasList) {
|
|
1439
|
+
continue;
|
|
1440
|
+
}
|
|
1441
|
+
if (
|
|
1442
|
+
options.hasTable !== void 0 &&
|
|
1443
|
+
section.hasTable !== options.hasTable
|
|
1444
|
+
) {
|
|
1445
|
+
continue;
|
|
1446
|
+
}
|
|
1447
|
+
if (options.minLevel !== void 0 && section.level < options.minLevel) {
|
|
1448
|
+
continue;
|
|
1449
|
+
}
|
|
1450
|
+
if (options.maxLevel !== void 0 && section.level > options.maxLevel) {
|
|
1451
|
+
continue;
|
|
1452
|
+
}
|
|
1453
|
+
const document = docIndex.documents[section.documentPath];
|
|
1454
|
+
if (document) {
|
|
1455
|
+
results.push({ section, document });
|
|
1456
|
+
}
|
|
1457
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
1458
|
+
break;
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
return results;
|
|
1462
|
+
});
|
|
1463
|
+
var searchContent = (rootPath, options = {}) =>
|
|
1464
|
+
Effect5.gen(function* () {
|
|
1465
|
+
const storage = createStorage(rootPath);
|
|
1466
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
1467
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
1468
|
+
if (!docIndex || !sectionIndex) {
|
|
1469
|
+
return [];
|
|
1470
|
+
}
|
|
1471
|
+
let parsedQuery = null;
|
|
1472
|
+
let contentRegex = null;
|
|
1473
|
+
let highlightRegex = null;
|
|
1474
|
+
if (options.content) {
|
|
1475
|
+
if (isAdvancedQuery(options.content)) {
|
|
1476
|
+
parsedQuery = parseQuery(options.content);
|
|
1477
|
+
if (parsedQuery) {
|
|
1478
|
+
highlightRegex = buildHighlightPattern(parsedQuery);
|
|
1479
|
+
}
|
|
1480
|
+
} else {
|
|
1481
|
+
contentRegex = new RegExp(options.content, "gi");
|
|
1482
|
+
highlightRegex = contentRegex;
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
const headingRegex = options.heading
|
|
1486
|
+
? new RegExp(options.heading, "i")
|
|
1487
|
+
: null;
|
|
1488
|
+
const results = [];
|
|
1489
|
+
const sectionsByDoc = {};
|
|
1490
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
1491
|
+
const docSections = sectionsByDoc[section.documentPath];
|
|
1492
|
+
if (docSections) {
|
|
1493
|
+
docSections.push(section);
|
|
1494
|
+
} else {
|
|
1495
|
+
sectionsByDoc[section.documentPath] = [section];
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
for (const [docPath, sections] of Object.entries(sectionsByDoc)) {
|
|
1499
|
+
if (options.pathPattern && !matchPath(docPath, options.pathPattern)) {
|
|
1500
|
+
continue;
|
|
1501
|
+
}
|
|
1502
|
+
const document = docIndex.documents[docPath];
|
|
1503
|
+
if (!document) continue;
|
|
1504
|
+
let fileContent = null;
|
|
1505
|
+
let fileLines = [];
|
|
1506
|
+
if (parsedQuery || contentRegex) {
|
|
1507
|
+
const filePath = path4.join(storage.rootPath, docPath);
|
|
1508
|
+
try {
|
|
1509
|
+
fileContent = yield* Effect5.promise(() =>
|
|
1510
|
+
fs4.readFile(filePath, "utf-8"),
|
|
1511
|
+
);
|
|
1512
|
+
fileLines = fileContent.split("\n");
|
|
1513
|
+
} catch {
|
|
1514
|
+
continue;
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
for (const section of sections) {
|
|
1518
|
+
if (headingRegex && !headingRegex.test(section.heading)) {
|
|
1519
|
+
continue;
|
|
1520
|
+
}
|
|
1521
|
+
if (options.hasCode !== void 0 && section.hasCode !== options.hasCode) {
|
|
1522
|
+
continue;
|
|
1523
|
+
}
|
|
1524
|
+
if (options.hasList !== void 0 && section.hasList !== options.hasList) {
|
|
1525
|
+
continue;
|
|
1526
|
+
}
|
|
1527
|
+
if (
|
|
1528
|
+
options.hasTable !== void 0 &&
|
|
1529
|
+
section.hasTable !== options.hasTable
|
|
1530
|
+
) {
|
|
1531
|
+
continue;
|
|
1532
|
+
}
|
|
1533
|
+
if (options.minLevel !== void 0 && section.level < options.minLevel) {
|
|
1534
|
+
continue;
|
|
1535
|
+
}
|
|
1536
|
+
if (options.maxLevel !== void 0 && section.level > options.maxLevel) {
|
|
1537
|
+
continue;
|
|
1538
|
+
}
|
|
1539
|
+
if ((parsedQuery || contentRegex) && fileContent) {
|
|
1540
|
+
const sectionLines = fileLines.slice(
|
|
1541
|
+
section.startLine - 1,
|
|
1542
|
+
section.endLine,
|
|
1543
|
+
);
|
|
1544
|
+
const sectionContent = sectionLines.join("\n");
|
|
1545
|
+
if (parsedQuery) {
|
|
1546
|
+
if (!evaluateQuery(parsedQuery.ast, sectionContent)) {
|
|
1547
|
+
continue;
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
const matches = [];
|
|
1551
|
+
const searchRegex = contentRegex || highlightRegex;
|
|
1552
|
+
const contextBefore = options.contextBefore ?? 1;
|
|
1553
|
+
const contextAfter = options.contextAfter ?? 1;
|
|
1554
|
+
if (searchRegex) {
|
|
1555
|
+
for (let i = 0; i < sectionLines.length; i++) {
|
|
1556
|
+
const line = sectionLines[i];
|
|
1557
|
+
if (line && searchRegex.test(line)) {
|
|
1558
|
+
searchRegex.lastIndex = 0;
|
|
1559
|
+
const absoluteLineNum = section.startLine + i;
|
|
1560
|
+
const snippetStart = Math.max(0, i - contextBefore);
|
|
1561
|
+
const snippetEnd = Math.min(
|
|
1562
|
+
sectionLines.length,
|
|
1563
|
+
i + contextAfter + 1,
|
|
1564
|
+
);
|
|
1565
|
+
const snippetLines = sectionLines.slice(
|
|
1566
|
+
snippetStart,
|
|
1567
|
+
snippetEnd,
|
|
1568
|
+
);
|
|
1569
|
+
const snippet = snippetLines.join("\n");
|
|
1570
|
+
const contextLines = [];
|
|
1571
|
+
for (let j = snippetStart; j < snippetEnd; j++) {
|
|
1572
|
+
const ctxLine = sectionLines[j];
|
|
1573
|
+
if (ctxLine !== void 0) {
|
|
1574
|
+
contextLines.push({
|
|
1575
|
+
lineNumber: section.startLine + j,
|
|
1576
|
+
line: ctxLine,
|
|
1577
|
+
isMatch: j === i,
|
|
1578
|
+
});
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
matches.push({
|
|
1582
|
+
lineNumber: absoluteLineNum,
|
|
1583
|
+
line,
|
|
1584
|
+
snippet,
|
|
1585
|
+
contextLines,
|
|
1586
|
+
});
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
if (parsedQuery || matches.length > 0) {
|
|
1591
|
+
const result = {
|
|
1592
|
+
section,
|
|
1593
|
+
document,
|
|
1594
|
+
sectionContent,
|
|
1595
|
+
};
|
|
1596
|
+
if (matches.length > 0) {
|
|
1597
|
+
results.push({ ...result, matches });
|
|
1598
|
+
} else {
|
|
1599
|
+
results.push(result);
|
|
1600
|
+
}
|
|
1601
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
1602
|
+
return results;
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1605
|
+
} else if (!parsedQuery && !contentRegex) {
|
|
1606
|
+
results.push({ section, document });
|
|
1607
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
1608
|
+
return results;
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
return results;
|
|
1614
|
+
});
|
|
1505
1615
|
|
|
1506
1616
|
export {
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1617
|
+
formatSummary,
|
|
1618
|
+
formatAssembledContext,
|
|
1619
|
+
summarizeFile,
|
|
1620
|
+
assembleContext,
|
|
1621
|
+
handleApiKeyError,
|
|
1622
|
+
estimateEmbeddingCost,
|
|
1623
|
+
buildEmbeddings,
|
|
1624
|
+
semanticSearch,
|
|
1625
|
+
getEmbeddingStats,
|
|
1626
|
+
isAdvancedQuery,
|
|
1627
|
+
search,
|
|
1628
|
+
searchContent,
|
|
1519
1629
|
};
|