mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,3063 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ApiKeyInvalidError,
|
|
3
|
+
ApiKeyMissingError,
|
|
4
|
+
ConfigError,
|
|
5
|
+
DimensionMismatchError,
|
|
6
|
+
EmbeddingError,
|
|
7
|
+
EmbeddingsNotFoundError,
|
|
8
|
+
INDEX_DIR,
|
|
9
|
+
IndexNotFoundError,
|
|
10
|
+
VectorStoreError,
|
|
11
|
+
countTokensApprox,
|
|
12
|
+
createStorage,
|
|
13
|
+
loadDocumentIndex,
|
|
14
|
+
loadSectionIndex,
|
|
15
|
+
parseFile
|
|
16
|
+
} from "./chunk-SG6GLU4U.js";
|
|
17
|
+
import {
|
|
18
|
+
MdContextConfig,
|
|
19
|
+
defaultConfig
|
|
20
|
+
} from "./chunk-7TOWB2XB.js";
|
|
21
|
+
|
|
22
|
+
// src/config/precedence.ts
|
|
23
|
+
import { ConfigProvider as ConfigProvider2, Effect as Effect2 } from "effect";
|
|
24
|
+
|
|
25
|
+
// src/config/file-provider.ts
|
|
26
|
+
import * as fs from "fs";
|
|
27
|
+
import * as path from "path";
|
|
28
|
+
import { ConfigProvider, Effect } from "effect";
|
|
29
|
+
var CONFIG_FILE_NAMES = [
|
|
30
|
+
"mdcontext.config.ts",
|
|
31
|
+
"mdcontext.config.js",
|
|
32
|
+
"mdcontext.config.mjs",
|
|
33
|
+
"mdcontext.config.json",
|
|
34
|
+
".mdcontextrc",
|
|
35
|
+
".mdcontextrc.json"
|
|
36
|
+
];
|
|
37
|
+
var findConfigFile = (startDir) => {
|
|
38
|
+
let currentDir = path.resolve(startDir);
|
|
39
|
+
const root = path.parse(currentDir).root;
|
|
40
|
+
while (currentDir !== root) {
|
|
41
|
+
for (const fileName of CONFIG_FILE_NAMES) {
|
|
42
|
+
const configPath = path.join(currentDir, fileName);
|
|
43
|
+
if (fs.existsSync(configPath)) {
|
|
44
|
+
const format = getConfigFormat(fileName);
|
|
45
|
+
return { path: configPath, format };
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
const parentDir = path.dirname(currentDir);
|
|
49
|
+
if (parentDir === currentDir) break;
|
|
50
|
+
currentDir = parentDir;
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
};
|
|
54
|
+
var getConfigFormat = (fileName) => {
|
|
55
|
+
if (fileName.endsWith(".ts")) return "ts";
|
|
56
|
+
if (fileName.endsWith(".js") || fileName.endsWith(".mjs")) return "js";
|
|
57
|
+
return "json";
|
|
58
|
+
};
|
|
59
|
+
var loadJsonConfig = (filePath) => Effect.try({
|
|
60
|
+
try: () => {
|
|
61
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
62
|
+
return JSON.parse(content);
|
|
63
|
+
},
|
|
64
|
+
catch: (error) => new ConfigError({
|
|
65
|
+
field: "configFile",
|
|
66
|
+
message: `Failed to load config from ${filePath}: ${error instanceof Error ? error.message : String(error)}`,
|
|
67
|
+
cause: error
|
|
68
|
+
})
|
|
69
|
+
});
|
|
70
|
+
var loadJsConfig = (filePath) => Effect.tryPromise({
|
|
71
|
+
try: async () => {
|
|
72
|
+
const fileUrl = `file://${filePath}`;
|
|
73
|
+
const module = await import(fileUrl);
|
|
74
|
+
const config = module.default ?? module.config;
|
|
75
|
+
if (!config || typeof config !== "object") {
|
|
76
|
+
throw new Error(
|
|
77
|
+
'Config file must export a default object or named "config" export'
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
return config;
|
|
81
|
+
},
|
|
82
|
+
catch: (error) => new ConfigError({
|
|
83
|
+
field: "configFile",
|
|
84
|
+
message: `Failed to load config from ${filePath}: ${error instanceof Error ? error.message : String(error)}`,
|
|
85
|
+
cause: error
|
|
86
|
+
})
|
|
87
|
+
});
|
|
88
|
+
var loadConfigFromFile = (filePath, format) => {
|
|
89
|
+
switch (format) {
|
|
90
|
+
case "json":
|
|
91
|
+
return loadJsonConfig(filePath);
|
|
92
|
+
case "ts":
|
|
93
|
+
case "js":
|
|
94
|
+
return loadJsConfig(filePath);
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
var loadConfigFile = (startDir) => Effect.gen(function* () {
|
|
98
|
+
const found = findConfigFile(startDir);
|
|
99
|
+
if (!found) {
|
|
100
|
+
return {
|
|
101
|
+
found: false,
|
|
102
|
+
searched: CONFIG_FILE_NAMES.map((name) => path.join(startDir, name))
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
const config = yield* loadConfigFromFile(found.path, found.format);
|
|
106
|
+
return {
|
|
107
|
+
found: true,
|
|
108
|
+
path: found.path,
|
|
109
|
+
config
|
|
110
|
+
};
|
|
111
|
+
});
|
|
112
|
+
var loadConfigFromPath = (configPath) => Effect.gen(function* () {
|
|
113
|
+
const resolvedPath = path.resolve(configPath);
|
|
114
|
+
if (!fs.existsSync(resolvedPath)) {
|
|
115
|
+
return yield* Effect.fail(
|
|
116
|
+
new ConfigError({
|
|
117
|
+
field: "configFile",
|
|
118
|
+
message: `Config file not found: ${resolvedPath}`
|
|
119
|
+
})
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
const format = getConfigFormat(path.basename(configPath));
|
|
123
|
+
return yield* loadConfigFromFile(resolvedPath, format);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// src/config/precedence.ts
|
|
127
|
+
var flattenConfig = (config, prefix = "") => {
|
|
128
|
+
const result = /* @__PURE__ */ new Map();
|
|
129
|
+
const flatten = (obj, currentPrefix) => {
|
|
130
|
+
if (obj === null || obj === void 0) {
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
if (Array.isArray(obj)) {
|
|
134
|
+
result.set(currentPrefix, obj.join(","));
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
if (typeof obj === "object") {
|
|
138
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
139
|
+
const newKey = currentPrefix ? `${currentPrefix}.${key}` : key;
|
|
140
|
+
flatten(value, newKey);
|
|
141
|
+
}
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
result.set(currentPrefix, String(obj));
|
|
145
|
+
};
|
|
146
|
+
flatten(config, prefix);
|
|
147
|
+
return result;
|
|
148
|
+
};
|
|
149
|
+
var ENV_SEQ_DELIM = ",";
|
|
150
|
+
var CONFIG_SCHEMA_KEYS = {
|
|
151
|
+
index: [
|
|
152
|
+
"maxDepth",
|
|
153
|
+
"excludePatterns",
|
|
154
|
+
"fileExtensions",
|
|
155
|
+
"followSymlinks",
|
|
156
|
+
"indexDir"
|
|
157
|
+
],
|
|
158
|
+
search: [
|
|
159
|
+
"defaultLimit",
|
|
160
|
+
"maxLimit",
|
|
161
|
+
"minSimilarity",
|
|
162
|
+
"includeSnippets",
|
|
163
|
+
"snippetLength",
|
|
164
|
+
"autoIndexThreshold"
|
|
165
|
+
],
|
|
166
|
+
embeddings: [
|
|
167
|
+
"provider",
|
|
168
|
+
"baseURL",
|
|
169
|
+
"model",
|
|
170
|
+
"dimensions",
|
|
171
|
+
"batchSize",
|
|
172
|
+
"maxRetries",
|
|
173
|
+
"retryDelayMs",
|
|
174
|
+
"timeoutMs",
|
|
175
|
+
"apiKey"
|
|
176
|
+
],
|
|
177
|
+
summarization: [
|
|
178
|
+
"briefTokenBudget",
|
|
179
|
+
"summaryTokenBudget",
|
|
180
|
+
"compressionRatio",
|
|
181
|
+
"minSectionTokens",
|
|
182
|
+
"maxTopics",
|
|
183
|
+
"minPartialBudget"
|
|
184
|
+
],
|
|
185
|
+
output: ["format", "color", "prettyJson", "verbose", "debug"],
|
|
186
|
+
paths: ["root", "configFile", "cacheDir"]
|
|
187
|
+
};
|
|
188
|
+
var generateEnvKeyMapping = () => {
|
|
189
|
+
const mapping = {};
|
|
190
|
+
for (const [section, keys] of Object.entries(CONFIG_SCHEMA_KEYS)) {
|
|
191
|
+
for (const key of keys) {
|
|
192
|
+
const envKey = `${section}_${key}`.toLowerCase();
|
|
193
|
+
const configKey = `${section}.${key}`;
|
|
194
|
+
mapping[envKey] = configKey;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return mapping;
|
|
198
|
+
};
|
|
199
|
+
var ENV_KEY_MAPPING = generateEnvKeyMapping();
|
|
200
|
+
var readEnvConfig = (prefix = "MDCONTEXT") => {
|
|
201
|
+
const result = /* @__PURE__ */ new Map();
|
|
202
|
+
const prefixWithUnderscore = `${prefix}_`;
|
|
203
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
204
|
+
if (key.startsWith(prefixWithUnderscore) && value !== void 0) {
|
|
205
|
+
const envKey = key.slice(prefixWithUnderscore.length).toLowerCase();
|
|
206
|
+
const configKey = ENV_KEY_MAPPING[envKey];
|
|
207
|
+
if (configKey) {
|
|
208
|
+
result.set(configKey, value);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return result;
|
|
213
|
+
};
|
|
214
|
+
var createConfigProvider = (options = {}) => Effect2.gen(function* () {
|
|
215
|
+
const {
|
|
216
|
+
cliOverrides,
|
|
217
|
+
configPath,
|
|
218
|
+
workingDir = process.cwd(),
|
|
219
|
+
envPrefix = "MDCONTEXT",
|
|
220
|
+
skipConfigFile = false,
|
|
221
|
+
skipEnv = false
|
|
222
|
+
} = options;
|
|
223
|
+
const mergedMap = /* @__PURE__ */ new Map();
|
|
224
|
+
if (!skipConfigFile) {
|
|
225
|
+
let fileConfig;
|
|
226
|
+
if (configPath) {
|
|
227
|
+
fileConfig = yield* loadConfigFromPath(configPath);
|
|
228
|
+
} else {
|
|
229
|
+
const result = yield* loadConfigFile(workingDir);
|
|
230
|
+
if (result.found) {
|
|
231
|
+
fileConfig = result.config;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
if (fileConfig) {
|
|
235
|
+
const flattened = flattenConfig(fileConfig);
|
|
236
|
+
for (const [k, v] of flattened) {
|
|
237
|
+
mergedMap.set(k, v);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
if (!skipEnv) {
|
|
242
|
+
const envConfig = readEnvConfig(envPrefix);
|
|
243
|
+
for (const [k, v] of envConfig) {
|
|
244
|
+
mergedMap.set(k, v);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
if (cliOverrides && Object.keys(cliOverrides).length > 0) {
|
|
248
|
+
const flattened = flattenConfig(cliOverrides);
|
|
249
|
+
for (const [k, v] of flattened) {
|
|
250
|
+
mergedMap.set(k, v);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return ConfigProvider2.fromMap(mergedMap, {
|
|
254
|
+
pathDelim: ".",
|
|
255
|
+
seqDelim: ENV_SEQ_DELIM
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
var createConfigProviderSync = (options = {}) => {
|
|
259
|
+
const {
|
|
260
|
+
cliOverrides,
|
|
261
|
+
fileConfig,
|
|
262
|
+
envPrefix = "MDCONTEXT",
|
|
263
|
+
skipConfigFile = false,
|
|
264
|
+
skipEnv = false
|
|
265
|
+
} = options;
|
|
266
|
+
const mergedMap = /* @__PURE__ */ new Map();
|
|
267
|
+
if (!skipConfigFile && fileConfig) {
|
|
268
|
+
const flattened = flattenConfig(fileConfig);
|
|
269
|
+
for (const [k, v] of flattened) {
|
|
270
|
+
mergedMap.set(k, v);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
if (!skipEnv) {
|
|
274
|
+
const envConfig = readEnvConfig(envPrefix);
|
|
275
|
+
for (const [k, v] of envConfig) {
|
|
276
|
+
mergedMap.set(k, v);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
if (cliOverrides && Object.keys(cliOverrides).length > 0) {
|
|
280
|
+
const flattened = flattenConfig(cliOverrides);
|
|
281
|
+
for (const [k, v] of flattened) {
|
|
282
|
+
mergedMap.set(k, v);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
return ConfigProvider2.fromMap(mergedMap, {
|
|
286
|
+
pathDelim: ".",
|
|
287
|
+
seqDelim: ENV_SEQ_DELIM
|
|
288
|
+
});
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
// src/config/service.ts
|
|
292
|
+
import { Context, Effect as Effect3, Layer } from "effect";
|
|
293
|
+
var ConfigService = class extends Context.Tag("ConfigService")() {
|
|
294
|
+
};
|
|
295
|
+
var ConfigServiceLive = Layer.effect(ConfigService, MdContextConfig);
|
|
296
|
+
var makeConfigLayer = (config) => Layer.succeed(ConfigService, config);
|
|
297
|
+
var ConfigServiceDefault = makeConfigLayer(defaultConfig);
|
|
298
|
+
|
|
299
|
+
// src/config/testing.ts
|
|
300
|
+
import { Effect as Effect4, Layer as Layer2 } from "effect";
|
|
301
|
+
var TestConfigLayer = Layer2.succeed(
|
|
302
|
+
ConfigService,
|
|
303
|
+
defaultConfig
|
|
304
|
+
);
|
|
305
|
+
|
|
306
|
+
// src/summarize/formatters.ts
|
|
307
|
+
var formatSummary = (summary, options = {}) => {
|
|
308
|
+
const maxTokens = options.maxTokens;
|
|
309
|
+
const flatSections = [];
|
|
310
|
+
const collectSections = (section, depth = 0, parentNumber = "", index = 0) => {
|
|
311
|
+
const number = parentNumber ? `${parentNumber}.${index + 1}` : `${index + 1}`;
|
|
312
|
+
flatSections.push({ section, depth, number });
|
|
313
|
+
section.children.forEach((child, i) => {
|
|
314
|
+
collectSections(child, depth + 1, number, i);
|
|
315
|
+
});
|
|
316
|
+
};
|
|
317
|
+
summary.sections.forEach((section, i) => {
|
|
318
|
+
collectSections(section, 0, "", i);
|
|
319
|
+
});
|
|
320
|
+
const buildOutput = (includedSectionIndices, truncationInfo, includeTopics2) => {
|
|
321
|
+
const lines = [];
|
|
322
|
+
if (truncationInfo.showWarning && truncationInfo.truncatedCount > 0 && truncationInfo.tokensTotal > 0) {
|
|
323
|
+
const pct = Math.round(
|
|
324
|
+
truncationInfo.tokensShown / truncationInfo.tokensTotal * 100
|
|
325
|
+
);
|
|
326
|
+
lines.push(
|
|
327
|
+
`\u26A0\uFE0F Truncated: Showing ~${truncationInfo.tokensShown}/${truncationInfo.tokensTotal} tokens (${pct}%)`
|
|
328
|
+
);
|
|
329
|
+
if (truncationInfo.includedNumbers.length > 0) {
|
|
330
|
+
const includedDisplay = truncationInfo.includedNumbers.length <= 6 ? truncationInfo.includedNumbers.join(", ") : truncationInfo.includedNumbers.slice(0, 5).join(", ") + `, ... (+${truncationInfo.includedNumbers.length - 5} more)`;
|
|
331
|
+
lines.push(`Sections included: ${includedDisplay}`);
|
|
332
|
+
}
|
|
333
|
+
if (truncationInfo.excludedNumbers.length > 0) {
|
|
334
|
+
const excludedDisplay = truncationInfo.excludedNumbers.length <= 6 ? truncationInfo.excludedNumbers.join(", ") : truncationInfo.excludedNumbers.slice(0, 5).join(", ") + `, ... (+${truncationInfo.excludedNumbers.length - 5} more)`;
|
|
335
|
+
lines.push(`Sections excluded: ${excludedDisplay}`);
|
|
336
|
+
}
|
|
337
|
+
lines.push(
|
|
338
|
+
"Use --full for complete content or --section to target specific sections."
|
|
339
|
+
);
|
|
340
|
+
lines.push("");
|
|
341
|
+
}
|
|
342
|
+
lines.push(`# ${summary.title}`);
|
|
343
|
+
lines.push(`Path: ${summary.path}`);
|
|
344
|
+
const tokenLineIndex = lines.length;
|
|
345
|
+
lines.push("PLACEHOLDER");
|
|
346
|
+
lines.push("");
|
|
347
|
+
const fullTopicsLine2 = summary.keyTopics.length > 0 ? `**Topics:** ${summary.keyTopics.join(", ")}` : "";
|
|
348
|
+
if (includeTopics2 && fullTopicsLine2) {
|
|
349
|
+
lines.push(fullTopicsLine2);
|
|
350
|
+
lines.push("");
|
|
351
|
+
}
|
|
352
|
+
const sectionLines = [];
|
|
353
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
354
|
+
if (!includedSectionIndices.has(i)) continue;
|
|
355
|
+
const { section, depth } = flatSections[i];
|
|
356
|
+
const indent = " ".repeat(depth);
|
|
357
|
+
const prefix = "#".repeat(section.level);
|
|
358
|
+
sectionLines.push(`${indent}${prefix} ${section.heading}`);
|
|
359
|
+
if (section.summary) {
|
|
360
|
+
sectionLines.push(`${indent}${section.summary}`);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
lines.push(sectionLines.join("\n"));
|
|
364
|
+
const tempOutput = lines.join("\n");
|
|
365
|
+
const tokensWithoutLine = countTokensApprox(
|
|
366
|
+
tempOutput.replace("PLACEHOLDER", "")
|
|
367
|
+
);
|
|
368
|
+
let estimatedTotal = tokensWithoutLine + 8;
|
|
369
|
+
for (let iter = 0; iter < 3; iter++) {
|
|
370
|
+
const testTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`;
|
|
371
|
+
const testOutput = tempOutput.replace("PLACEHOLDER", testTokenLine);
|
|
372
|
+
const actualTotal = countTokensApprox(testOutput);
|
|
373
|
+
if (actualTotal === estimatedTotal) break;
|
|
374
|
+
estimatedTotal = actualTotal;
|
|
375
|
+
}
|
|
376
|
+
const finalTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`;
|
|
377
|
+
lines[tokenLineIndex] = finalTokenLine;
|
|
378
|
+
return lines.join("\n");
|
|
379
|
+
};
|
|
380
|
+
if (maxTokens === void 0) {
|
|
381
|
+
const allIndices = new Set(flatSections.map((_, i) => i));
|
|
382
|
+
const hasPriorTruncation = summary.truncated && summary.truncatedCount;
|
|
383
|
+
return buildOutput(
|
|
384
|
+
allIndices,
|
|
385
|
+
{
|
|
386
|
+
showWarning: !!hasPriorTruncation,
|
|
387
|
+
truncatedCount: summary.truncatedCount ?? 0,
|
|
388
|
+
includedNumbers: flatSections.map((s) => s.number),
|
|
389
|
+
excludedNumbers: [],
|
|
390
|
+
tokensShown: summary.summaryTokens,
|
|
391
|
+
tokensTotal: summary.originalTokens
|
|
392
|
+
},
|
|
393
|
+
true
|
|
394
|
+
);
|
|
395
|
+
}
|
|
396
|
+
const includedIndices = /* @__PURE__ */ new Set();
|
|
397
|
+
let truncatedCount = 0;
|
|
398
|
+
let includeTopics = true;
|
|
399
|
+
const SAFETY_MARGIN = 1.15;
|
|
400
|
+
const minHeaderTemplate = [
|
|
401
|
+
`# ${summary.title}`,
|
|
402
|
+
`Path: ${summary.path}`,
|
|
403
|
+
`Tokens: 9999 (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`,
|
|
404
|
+
"",
|
|
405
|
+
""
|
|
406
|
+
].join("\n");
|
|
407
|
+
const minHeaderTokens = Math.ceil(
|
|
408
|
+
countTokensApprox(minHeaderTemplate) * SAFETY_MARGIN
|
|
409
|
+
);
|
|
410
|
+
const fullTopicsLine = summary.keyTopics.length > 0 ? `**Topics:** ${summary.keyTopics.join(", ")}
|
|
411
|
+
` : "";
|
|
412
|
+
const topicsTokens = fullTopicsLine ? Math.ceil(countTokensApprox(fullTopicsLine) * SAFETY_MARGIN) : 0;
|
|
413
|
+
const truncationWarningTokens = Math.ceil(
|
|
414
|
+
countTokensApprox(
|
|
415
|
+
`\u26A0\uFE0F Truncated: Showing ~9999/9999 tokens (99%)
|
|
416
|
+
Sections included: 1, 2, 3, 4, 5, ... (+99 more)
|
|
417
|
+
Sections excluded: 6, 7, 8, 9, 10, ... (+99 more)
|
|
418
|
+
Use --full for complete content or --section to target specific sections.
|
|
419
|
+
`
|
|
420
|
+
) * SAFETY_MARGIN
|
|
421
|
+
);
|
|
422
|
+
let headerTokens = minHeaderTokens + topicsTokens;
|
|
423
|
+
if (headerTokens >= maxTokens) {
|
|
424
|
+
includeTopics = false;
|
|
425
|
+
headerTokens = minHeaderTokens;
|
|
426
|
+
}
|
|
427
|
+
let contentBudget = maxTokens - headerTokens - truncationWarningTokens;
|
|
428
|
+
let tokensUsed = 0;
|
|
429
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
430
|
+
const { section, depth } = flatSections[i];
|
|
431
|
+
const indent = " ".repeat(depth);
|
|
432
|
+
const prefix = "#".repeat(section.level);
|
|
433
|
+
const sectionContent = section.summary ? `${indent}${prefix} ${section.heading}
|
|
434
|
+
${indent}${section.summary}` : `${indent}${prefix} ${section.heading}`;
|
|
435
|
+
const sectionTokens = Math.ceil(
|
|
436
|
+
countTokensApprox(sectionContent) * SAFETY_MARGIN
|
|
437
|
+
);
|
|
438
|
+
if (tokensUsed + sectionTokens <= contentBudget) {
|
|
439
|
+
includedIndices.add(i);
|
|
440
|
+
tokensUsed += sectionTokens;
|
|
441
|
+
} else {
|
|
442
|
+
truncatedCount++;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
if (truncatedCount === 0) {
|
|
446
|
+
contentBudget += truncationWarningTokens;
|
|
447
|
+
}
|
|
448
|
+
const includedNumbers = [];
|
|
449
|
+
const excludedNumbers = [];
|
|
450
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
451
|
+
if (includedIndices.has(i)) {
|
|
452
|
+
includedNumbers.push(flatSections[i].number);
|
|
453
|
+
} else {
|
|
454
|
+
excludedNumbers.push(flatSections[i].number);
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
let tokensShown = 0;
|
|
458
|
+
for (const idx of includedIndices) {
|
|
459
|
+
tokensShown += flatSections[idx].section.summaryTokens;
|
|
460
|
+
}
|
|
461
|
+
let output = buildOutput(
|
|
462
|
+
includedIndices,
|
|
463
|
+
{
|
|
464
|
+
showWarning: truncatedCount > 0,
|
|
465
|
+
truncatedCount,
|
|
466
|
+
includedNumbers,
|
|
467
|
+
excludedNumbers,
|
|
468
|
+
tokensShown,
|
|
469
|
+
tokensTotal: summary.originalTokens
|
|
470
|
+
},
|
|
471
|
+
includeTopics
|
|
472
|
+
);
|
|
473
|
+
let actualTokens = countTokensApprox(output);
|
|
474
|
+
const sortedIndices = Array.from(includedIndices).sort((a, b) => b - a);
|
|
475
|
+
let removalIndex = 0;
|
|
476
|
+
while (actualTokens > maxTokens && removalIndex < sortedIndices.length) {
|
|
477
|
+
const indexToRemove = sortedIndices[removalIndex];
|
|
478
|
+
includedIndices.delete(indexToRemove);
|
|
479
|
+
truncatedCount++;
|
|
480
|
+
removalIndex++;
|
|
481
|
+
const removedNumber = flatSections[indexToRemove].number;
|
|
482
|
+
const includedIdx = includedNumbers.indexOf(removedNumber);
|
|
483
|
+
if (includedIdx !== -1) {
|
|
484
|
+
includedNumbers.splice(includedIdx, 1);
|
|
485
|
+
excludedNumbers.push(removedNumber);
|
|
486
|
+
}
|
|
487
|
+
tokensShown -= flatSections[indexToRemove].section.summaryTokens;
|
|
488
|
+
output = buildOutput(
|
|
489
|
+
includedIndices,
|
|
490
|
+
{
|
|
491
|
+
showWarning: true,
|
|
492
|
+
truncatedCount,
|
|
493
|
+
includedNumbers,
|
|
494
|
+
excludedNumbers,
|
|
495
|
+
tokensShown,
|
|
496
|
+
tokensTotal: summary.originalTokens
|
|
497
|
+
},
|
|
498
|
+
includeTopics
|
|
499
|
+
);
|
|
500
|
+
actualTokens = countTokensApprox(output);
|
|
501
|
+
}
|
|
502
|
+
if (actualTokens > maxTokens && includeTopics) {
|
|
503
|
+
includeTopics = false;
|
|
504
|
+
output = buildOutput(
|
|
505
|
+
includedIndices,
|
|
506
|
+
{
|
|
507
|
+
showWarning: truncatedCount > 0,
|
|
508
|
+
truncatedCount,
|
|
509
|
+
includedNumbers,
|
|
510
|
+
excludedNumbers,
|
|
511
|
+
tokensShown,
|
|
512
|
+
tokensTotal: summary.originalTokens
|
|
513
|
+
},
|
|
514
|
+
includeTopics
|
|
515
|
+
);
|
|
516
|
+
actualTokens = countTokensApprox(output);
|
|
517
|
+
}
|
|
518
|
+
if (actualTokens > maxTokens && truncatedCount > 0) {
|
|
519
|
+
output = buildOutput(
|
|
520
|
+
includedIndices,
|
|
521
|
+
{
|
|
522
|
+
showWarning: false,
|
|
523
|
+
truncatedCount,
|
|
524
|
+
includedNumbers,
|
|
525
|
+
excludedNumbers,
|
|
526
|
+
tokensShown,
|
|
527
|
+
tokensTotal: summary.originalTokens
|
|
528
|
+
},
|
|
529
|
+
includeTopics
|
|
530
|
+
);
|
|
531
|
+
actualTokens = countTokensApprox(output);
|
|
532
|
+
}
|
|
533
|
+
return output;
|
|
534
|
+
};
|
|
535
|
+
var formatAssembledContext = (context) => {
|
|
536
|
+
const lines = [];
|
|
537
|
+
lines.push("# Context Assembly");
|
|
538
|
+
lines.push(`Total tokens: ${context.totalTokens}/${context.budget}`);
|
|
539
|
+
lines.push(`Sources: ${context.sources.length}`);
|
|
540
|
+
lines.push("");
|
|
541
|
+
for (const source of context.sources) {
|
|
542
|
+
lines.push("---");
|
|
543
|
+
lines.push("");
|
|
544
|
+
lines.push(source.content);
|
|
545
|
+
}
|
|
546
|
+
if (context.overflow.length > 0) {
|
|
547
|
+
lines.push("---");
|
|
548
|
+
lines.push("");
|
|
549
|
+
lines.push("## Overflow (not included due to budget)");
|
|
550
|
+
for (const overflowPath of context.overflow) {
|
|
551
|
+
lines.push(`- ${overflowPath}`);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
return lines.join("\n");
|
|
555
|
+
};
|
|
556
|
+
|
|
557
|
+
// src/summarize/summarizer.ts
|
|
558
|
+
import * as fs2 from "fs/promises";
|
|
559
|
+
import * as path2 from "path";
|
|
560
|
+
import { Effect as Effect5 } from "effect";
|
|
561
|
+
|
|
562
|
+
// src/parser/section-filter.ts
|
|
563
|
+
var globMatch = (text, pattern) => {
|
|
564
|
+
const regexPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
|
|
565
|
+
const regex = new RegExp(`^${regexPattern}$`, "i");
|
|
566
|
+
return regex.test(text);
|
|
567
|
+
};
|
|
568
|
+
var buildSectionList = (document) => {
|
|
569
|
+
const result = [];
|
|
570
|
+
const processSection = (section, prefix, index) => {
|
|
571
|
+
const number = prefix ? `${prefix}.${index + 1}` : `${index + 1}`;
|
|
572
|
+
result.push({
|
|
573
|
+
number,
|
|
574
|
+
heading: section.heading,
|
|
575
|
+
level: section.level,
|
|
576
|
+
tokenCount: section.metadata.tokenCount
|
|
577
|
+
});
|
|
578
|
+
section.children.forEach((child, i) => {
|
|
579
|
+
processSection(child, number, i);
|
|
580
|
+
});
|
|
581
|
+
};
|
|
582
|
+
document.sections.forEach((section, i) => {
|
|
583
|
+
processSection(section, "", i);
|
|
584
|
+
});
|
|
585
|
+
return result;
|
|
586
|
+
};
|
|
587
|
+
var formatSectionList = (sections) => {
|
|
588
|
+
const lines = [];
|
|
589
|
+
for (const section of sections) {
|
|
590
|
+
const depth = (section.number.match(/\./g) || []).length;
|
|
591
|
+
const indent = " ".repeat(depth);
|
|
592
|
+
lines.push(
|
|
593
|
+
`${indent}${section.number}. ${section.heading} (${section.tokenCount} tokens)`
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
return lines.join("\n");
|
|
597
|
+
};
|
|
598
|
+
var matchesSelector = (section, selector) => {
|
|
599
|
+
if (/^[\d.]+$/.test(selector)) {
|
|
600
|
+
return section.number === selector;
|
|
601
|
+
}
|
|
602
|
+
if (section.heading.toLowerCase() === selector.toLowerCase()) {
|
|
603
|
+
return true;
|
|
604
|
+
}
|
|
605
|
+
if (selector.includes("*") || selector.includes("?")) {
|
|
606
|
+
return globMatch(section.heading, selector);
|
|
607
|
+
}
|
|
608
|
+
return section.heading.toLowerCase().includes(selector.toLowerCase());
|
|
609
|
+
};
|
|
610
|
+
var matchesExclusionPatterns = (section, excludePatterns) => {
|
|
611
|
+
return excludePatterns.some((pattern) => matchesSelector(section, pattern));
|
|
612
|
+
};
|
|
613
|
+
var findMatchingSections = (sectionList, selector) => {
|
|
614
|
+
return sectionList.filter((s) => matchesSelector(s, selector));
|
|
615
|
+
};
|
|
616
|
+
var filterExcludedSections = (sectionList, excludePatterns) => {
|
|
617
|
+
if (excludePatterns.length === 0) {
|
|
618
|
+
return sectionList;
|
|
619
|
+
}
|
|
620
|
+
return sectionList.filter(
|
|
621
|
+
(s) => !matchesExclusionPatterns(s, excludePatterns)
|
|
622
|
+
);
|
|
623
|
+
};
|
|
624
|
+
var getDescendantNumbers = (sectionList, parentNumber) => {
|
|
625
|
+
const result = /* @__PURE__ */ new Set();
|
|
626
|
+
const prefix = `${parentNumber}.`;
|
|
627
|
+
for (const section of sectionList) {
|
|
628
|
+
if (section.number.startsWith(prefix)) {
|
|
629
|
+
result.add(section.number);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
return result;
|
|
633
|
+
};
|
|
634
|
+
var extractSectionContent = (document, selector, options = {}) => {
|
|
635
|
+
const sectionList = buildSectionList(document);
|
|
636
|
+
let matchedSections = findMatchingSections(sectionList, selector);
|
|
637
|
+
const excludedNumbers = [];
|
|
638
|
+
if (options.exclude && options.exclude.length > 0) {
|
|
639
|
+
const beforeFilter = matchedSections;
|
|
640
|
+
matchedSections = filterExcludedSections(matchedSections, options.exclude);
|
|
641
|
+
for (const section of beforeFilter) {
|
|
642
|
+
if (!matchedSections.includes(section)) {
|
|
643
|
+
excludedNumbers.push(section.number);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
if (matchedSections.length === 0) {
|
|
648
|
+
return { sections: [], matchedNumbers: [], excludedNumbers };
|
|
649
|
+
}
|
|
650
|
+
const numbersToInclude = /* @__PURE__ */ new Set();
|
|
651
|
+
const matchedNumbers = [];
|
|
652
|
+
for (const matched of matchedSections) {
|
|
653
|
+
numbersToInclude.add(matched.number);
|
|
654
|
+
matchedNumbers.push(matched.number);
|
|
655
|
+
if (!options.shallow) {
|
|
656
|
+
const descendants = getDescendantNumbers(sectionList, matched.number);
|
|
657
|
+
for (const desc of descendants) {
|
|
658
|
+
numbersToInclude.add(desc);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
const numberToSection = /* @__PURE__ */ new Map();
|
|
663
|
+
const mapSections = (sections, prefix) => {
|
|
664
|
+
sections.forEach((section, i) => {
|
|
665
|
+
const number = prefix ? `${prefix}.${i + 1}` : `${i + 1}`;
|
|
666
|
+
numberToSection.set(number, section);
|
|
667
|
+
mapSections(section.children, number);
|
|
668
|
+
});
|
|
669
|
+
};
|
|
670
|
+
mapSections(document.sections, "");
|
|
671
|
+
const extractedSections = [];
|
|
672
|
+
for (const number of matchedNumbers) {
|
|
673
|
+
const section = numberToSection.get(number);
|
|
674
|
+
if (section) {
|
|
675
|
+
if (options.shallow) {
|
|
676
|
+
extractedSections.push({
|
|
677
|
+
...section,
|
|
678
|
+
children: []
|
|
679
|
+
});
|
|
680
|
+
} else {
|
|
681
|
+
extractedSections.push(section);
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
return { sections: extractedSections, matchedNumbers, excludedNumbers };
|
|
686
|
+
};
|
|
687
|
+
var formatExtractedSections = (sections) => {
|
|
688
|
+
const formatSection = (section, includeChildren) => {
|
|
689
|
+
const lines = [];
|
|
690
|
+
const headingPrefix = "#".repeat(section.level);
|
|
691
|
+
lines.push(`${headingPrefix} ${section.heading}`);
|
|
692
|
+
lines.push("");
|
|
693
|
+
const contentLines = section.content.split("\n");
|
|
694
|
+
const contentWithoutHeading = contentLines.filter((line, i) => i > 0 || !line.startsWith("#")).join("\n").trim();
|
|
695
|
+
if (contentWithoutHeading) {
|
|
696
|
+
lines.push(contentWithoutHeading);
|
|
697
|
+
}
|
|
698
|
+
if (includeChildren) {
|
|
699
|
+
for (const child of section.children) {
|
|
700
|
+
lines.push("");
|
|
701
|
+
lines.push(formatSection(child, true));
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
return lines.join("\n");
|
|
705
|
+
};
|
|
706
|
+
return sections.map((s) => formatSection(s, true)).join("\n\n");
|
|
707
|
+
};
|
|
708
|
+
var filterDocumentSections = (document, excludePatterns) => {
|
|
709
|
+
if (excludePatterns.length === 0) {
|
|
710
|
+
return { document, excludedCount: 0 };
|
|
711
|
+
}
|
|
712
|
+
const sectionList = buildSectionList(document);
|
|
713
|
+
let excludedCount = 0;
|
|
714
|
+
const numbersToExclude = /* @__PURE__ */ new Set();
|
|
715
|
+
for (const section of sectionList) {
|
|
716
|
+
if (matchesExclusionPatterns(section, excludePatterns)) {
|
|
717
|
+
const prefix = `${section.number}.`;
|
|
718
|
+
for (const candidate of sectionList) {
|
|
719
|
+
if (candidate.number === section.number || candidate.number.startsWith(prefix)) {
|
|
720
|
+
if (!numbersToExclude.has(candidate.number)) {
|
|
721
|
+
numbersToExclude.add(candidate.number);
|
|
722
|
+
excludedCount++;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
if (numbersToExclude.size === 0) {
|
|
729
|
+
return { document, excludedCount: 0 };
|
|
730
|
+
}
|
|
731
|
+
const filterSections = (sections, prefix) => {
|
|
732
|
+
const result = [];
|
|
733
|
+
sections.forEach((section, i) => {
|
|
734
|
+
const number = prefix ? `${prefix}.${i + 1}` : `${i + 1}`;
|
|
735
|
+
if (!numbersToExclude.has(number)) {
|
|
736
|
+
result.push({
|
|
737
|
+
...section,
|
|
738
|
+
children: filterSections(section.children, number)
|
|
739
|
+
});
|
|
740
|
+
}
|
|
741
|
+
});
|
|
742
|
+
return result;
|
|
743
|
+
};
|
|
744
|
+
const filteredSections = filterSections(document.sections, "");
|
|
745
|
+
return {
|
|
746
|
+
document: {
|
|
747
|
+
...document,
|
|
748
|
+
sections: filteredSections
|
|
749
|
+
},
|
|
750
|
+
excludedCount
|
|
751
|
+
};
|
|
752
|
+
};
|
|
753
|
+
|
|
754
|
+
// src/summarize/summarizer.ts
|
|
755
|
+
var TOKEN_BUDGETS = {
|
|
756
|
+
brief: 100,
|
|
757
|
+
summary: 500,
|
|
758
|
+
full: Infinity
|
|
759
|
+
};
|
|
760
|
+
var MIN_SENTENCE_LENGTH = 10;
|
|
761
|
+
var SENTENCE_SCORE_DEFINITION = 2;
|
|
762
|
+
var SENTENCE_SCORE_PROPER_START = 1;
|
|
763
|
+
var SENTENCE_SCORE_MEDIUM_LENGTH = 1;
|
|
764
|
+
var SENTENCE_SCORE_EMPHASIS = 1;
|
|
765
|
+
var SENTENCE_LENGTH_MIN = 50;
|
|
766
|
+
var SENTENCE_LENGTH_MAX = 200;
|
|
767
|
+
var SUMMARY_COMPRESSION_RATIO = 0.3;
|
|
768
|
+
var MIN_SECTION_TOKENS = 20;
|
|
769
|
+
var MIN_SUMMARY_SENTENCES = 2;
|
|
770
|
+
var TOKENS_PER_SENTENCE_ESTIMATE = 30;
|
|
771
|
+
var MIN_TOPIC_LENGTH = 2;
|
|
772
|
+
var MAX_TOPIC_LENGTH = 50;
|
|
773
|
+
var MAX_TOPICS = 10;
|
|
774
|
+
var MIN_PARTIAL_BUDGET = 50;
|
|
775
|
+
var extractKeyPoints = (content, maxSentences) => {
|
|
776
|
+
const sentences = content.replace(/\n+/g, " ").split(/(?<=[.!?])\s+/).filter((s) => s.trim().length > MIN_SENTENCE_LENGTH);
|
|
777
|
+
if (sentences.length <= maxSentences) {
|
|
778
|
+
return sentences;
|
|
779
|
+
}
|
|
780
|
+
const scored = sentences.map((s) => {
|
|
781
|
+
let score = 0;
|
|
782
|
+
if (s.includes(":")) score += SENTENCE_SCORE_DEFINITION;
|
|
783
|
+
if (/^[A-Z]/.test(s)) score += SENTENCE_SCORE_PROPER_START;
|
|
784
|
+
if (s.length > SENTENCE_LENGTH_MIN && s.length < SENTENCE_LENGTH_MAX)
|
|
785
|
+
score += SENTENCE_SCORE_MEDIUM_LENGTH;
|
|
786
|
+
if (/\*\*|`/.test(s)) score += SENTENCE_SCORE_EMPHASIS;
|
|
787
|
+
return { sentence: s, score };
|
|
788
|
+
});
|
|
789
|
+
scored.sort((a, b) => b.score - a.score);
|
|
790
|
+
return scored.slice(0, maxSentences).map((s) => s.sentence);
|
|
791
|
+
};
|
|
792
|
+
var summarizeSection = (section, level) => {
|
|
793
|
+
const originalTokens = section.metadata.tokenCount;
|
|
794
|
+
const children = section.children.map(
|
|
795
|
+
(child) => summarizeSection(child, level)
|
|
796
|
+
);
|
|
797
|
+
const targetTokens = Math.min(
|
|
798
|
+
TOKEN_BUDGETS[level],
|
|
799
|
+
Math.max(originalTokens * SUMMARY_COMPRESSION_RATIO, MIN_SECTION_TOKENS)
|
|
800
|
+
);
|
|
801
|
+
let summary;
|
|
802
|
+
if (level === "full" || originalTokens <= targetTokens) {
|
|
803
|
+
summary = section.plainText;
|
|
804
|
+
} else if (level === "brief") {
|
|
805
|
+
const meta = [];
|
|
806
|
+
if (section.metadata.hasCode) meta.push("code");
|
|
807
|
+
if (section.metadata.hasList) meta.push("list");
|
|
808
|
+
if (section.metadata.hasTable) meta.push("table");
|
|
809
|
+
summary = meta.length > 0 ? `[${meta.join(", ")}]` : "";
|
|
810
|
+
} else {
|
|
811
|
+
const maxSentences = Math.max(
|
|
812
|
+
MIN_SUMMARY_SENTENCES,
|
|
813
|
+
Math.floor(targetTokens / TOKENS_PER_SENTENCE_ESTIMATE)
|
|
814
|
+
);
|
|
815
|
+
const keyPoints = extractKeyPoints(section.plainText, maxSentences);
|
|
816
|
+
if (keyPoints.length > 0) {
|
|
817
|
+
summary = keyPoints.join(" ");
|
|
818
|
+
} else {
|
|
819
|
+
const words = section.plainText.split(/\s+/).slice(0, targetTokens);
|
|
820
|
+
summary = words.join(" ") + (words.length < section.plainText.split(/\s+/).length ? "..." : "");
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
const summaryTokens = countTokensApprox(summary);
|
|
824
|
+
return {
|
|
825
|
+
heading: section.heading,
|
|
826
|
+
level: section.level,
|
|
827
|
+
originalTokens,
|
|
828
|
+
summaryTokens,
|
|
829
|
+
summary,
|
|
830
|
+
children,
|
|
831
|
+
hasCode: section.metadata.hasCode,
|
|
832
|
+
hasList: section.metadata.hasList,
|
|
833
|
+
hasTable: section.metadata.hasTable
|
|
834
|
+
};
|
|
835
|
+
};
|
|
836
|
+
var extractTopics = (document) => {
|
|
837
|
+
const topics = /* @__PURE__ */ new Set();
|
|
838
|
+
const processSection = (section) => {
|
|
839
|
+
const cleanHeading = section.heading.replace(/[:#\-_]/g, " ").trim().toLowerCase();
|
|
840
|
+
if (cleanHeading.length > MIN_TOPIC_LENGTH && cleanHeading.length < MAX_TOPIC_LENGTH) {
|
|
841
|
+
topics.add(cleanHeading);
|
|
842
|
+
}
|
|
843
|
+
for (const child of section.children) {
|
|
844
|
+
processSection(child);
|
|
845
|
+
}
|
|
846
|
+
};
|
|
847
|
+
for (const section of document.sections) {
|
|
848
|
+
processSection(section);
|
|
849
|
+
}
|
|
850
|
+
const frontmatter = document.frontmatter;
|
|
851
|
+
if (frontmatter.tags && Array.isArray(frontmatter.tags)) {
|
|
852
|
+
for (const tag of frontmatter.tags) {
|
|
853
|
+
if (typeof tag === "string") {
|
|
854
|
+
topics.add(tag.toLowerCase());
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
return Array.from(topics).slice(0, MAX_TOPICS);
|
|
859
|
+
};
|
|
860
|
+
var summarizeDocument = (document, options = {}) => {
|
|
861
|
+
const level = options.level ?? "summary";
|
|
862
|
+
const maxTokens = options.maxTokens ?? TOKEN_BUDGETS[level];
|
|
863
|
+
const allSections = document.sections.map((s) => summarizeSection(s, level));
|
|
864
|
+
const originalTokens = document.metadata.tokenCount;
|
|
865
|
+
let totalSummaryTokens = 0;
|
|
866
|
+
const flatSections = [];
|
|
867
|
+
const flattenWithTokens = (section) => {
|
|
868
|
+
flatSections.push(section);
|
|
869
|
+
totalSummaryTokens += section.summaryTokens;
|
|
870
|
+
for (const child of section.children) {
|
|
871
|
+
flattenWithTokens(child);
|
|
872
|
+
}
|
|
873
|
+
};
|
|
874
|
+
for (const section of allSections) {
|
|
875
|
+
flattenWithTokens(section);
|
|
876
|
+
}
|
|
877
|
+
const topics = extractTopics(document);
|
|
878
|
+
const headerTemplate = `# ${document.title}
|
|
879
|
+
Path: ${document.path}
|
|
880
|
+
Tokens: 9999 (99% reduction from ${document.metadata.tokenCount})
|
|
881
|
+
`;
|
|
882
|
+
const topicsLine = topics.length > 0 ? `
|
|
883
|
+
**Topics:** ${topics.join(", ")}
|
|
884
|
+
` : "";
|
|
885
|
+
const truncationWarning = "\n\u26A0\uFE0F TRUNCATED: 999 sections omitted to fit token budget";
|
|
886
|
+
const baseOverhead = countTokensApprox(
|
|
887
|
+
headerTemplate + topicsLine + truncationWarning
|
|
888
|
+
);
|
|
889
|
+
const formattingOverhead = Math.ceil(baseOverhead * 1.2) + 20;
|
|
890
|
+
const contentBudget = maxTokens - formattingOverhead;
|
|
891
|
+
let truncated = false;
|
|
892
|
+
let truncatedCount = 0;
|
|
893
|
+
let sections;
|
|
894
|
+
let summaryTokens;
|
|
895
|
+
if (totalSummaryTokens > contentBudget && contentBudget > 0) {
|
|
896
|
+
let tokensUsed = 0;
|
|
897
|
+
const truncateSections = (sectionList) => {
|
|
898
|
+
const result2 = [];
|
|
899
|
+
for (const section of sectionList) {
|
|
900
|
+
const sectionOwnTokens = section.summaryTokens;
|
|
901
|
+
const fitsInBudget = tokensUsed + sectionOwnTokens <= contentBudget;
|
|
902
|
+
if (fitsInBudget) {
|
|
903
|
+
tokensUsed += sectionOwnTokens;
|
|
904
|
+
const truncatedChildren = truncateSections(section.children);
|
|
905
|
+
result2.push({
|
|
906
|
+
...section,
|
|
907
|
+
children: truncatedChildren
|
|
908
|
+
});
|
|
909
|
+
} else {
|
|
910
|
+
truncatedCount++;
|
|
911
|
+
const rescuedChildren = truncateSections(section.children);
|
|
912
|
+
result2.push(...rescuedChildren);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
return result2;
|
|
916
|
+
};
|
|
917
|
+
sections = truncateSections(allSections);
|
|
918
|
+
summaryTokens = tokensUsed;
|
|
919
|
+
truncated = truncatedCount > 0;
|
|
920
|
+
} else {
|
|
921
|
+
sections = allSections;
|
|
922
|
+
summaryTokens = totalSummaryTokens;
|
|
923
|
+
}
|
|
924
|
+
const compressionRatio = originalTokens > 0 ? 1 - summaryTokens / originalTokens : 0;
|
|
925
|
+
const result = {
|
|
926
|
+
path: document.path,
|
|
927
|
+
title: document.title,
|
|
928
|
+
originalTokens,
|
|
929
|
+
summaryTokens,
|
|
930
|
+
compressionRatio,
|
|
931
|
+
sections,
|
|
932
|
+
keyTopics: topics
|
|
933
|
+
};
|
|
934
|
+
if (truncated) {
|
|
935
|
+
return {
|
|
936
|
+
...result,
|
|
937
|
+
truncated: true,
|
|
938
|
+
truncatedCount
|
|
939
|
+
};
|
|
940
|
+
}
|
|
941
|
+
return result;
|
|
942
|
+
};
|
|
943
|
+
var summarizeFile = (filePath, options = {}) => Effect5.gen(function* () {
|
|
944
|
+
let document = yield* parseFile(filePath);
|
|
945
|
+
if (options.exclude && options.exclude.length > 0) {
|
|
946
|
+
const { document: filteredDoc } = filterDocumentSections(
|
|
947
|
+
document,
|
|
948
|
+
options.exclude
|
|
949
|
+
);
|
|
950
|
+
document = filteredDoc;
|
|
951
|
+
}
|
|
952
|
+
return summarizeDocument(document, options);
|
|
953
|
+
});
|
|
954
|
+
var assembleContext = (rootPath, sourcePaths, options) => Effect5.gen(function* () {
|
|
955
|
+
const budget = options.budget;
|
|
956
|
+
const level = options.level ?? "summary";
|
|
957
|
+
const excludePatterns = options.exclude ?? [];
|
|
958
|
+
const sources = [];
|
|
959
|
+
const overflow = [];
|
|
960
|
+
let totalTokens = 0;
|
|
961
|
+
const perSourceBudget = Math.floor(budget / sourcePaths.length);
|
|
962
|
+
for (const sourcePath of sourcePaths) {
|
|
963
|
+
const resolvedPath = path2.isAbsolute(sourcePath) ? sourcePath : path2.join(rootPath, sourcePath);
|
|
964
|
+
const summaryResult = yield* summarizeFile(resolvedPath, {
|
|
965
|
+
level,
|
|
966
|
+
maxTokens: perSourceBudget,
|
|
967
|
+
exclude: excludePatterns
|
|
968
|
+
}).pipe(
|
|
969
|
+
Effect5.map((s) => s),
|
|
970
|
+
// Log error for observability before gracefully degrading
|
|
971
|
+
Effect5.tapError(
|
|
972
|
+
(error) => Effect5.logError(`Failed to summarize ${sourcePath}`, error)
|
|
973
|
+
),
|
|
974
|
+
// Note: catchAll intentional for batch processing - individual file
|
|
975
|
+
// failures add to overflow instead of stopping assembly
|
|
976
|
+
Effect5.catchAll(() => Effect5.succeed(null))
|
|
977
|
+
);
|
|
978
|
+
if (!summaryResult) {
|
|
979
|
+
overflow.push(sourcePath);
|
|
980
|
+
continue;
|
|
981
|
+
}
|
|
982
|
+
const summary = summaryResult;
|
|
983
|
+
const content = formatSummary(summary);
|
|
984
|
+
const tokens = countTokensApprox(content);
|
|
985
|
+
if (totalTokens + tokens <= budget) {
|
|
986
|
+
sources.push({
|
|
987
|
+
path: path2.relative(rootPath, resolvedPath),
|
|
988
|
+
title: summary.title,
|
|
989
|
+
tokens,
|
|
990
|
+
content
|
|
991
|
+
});
|
|
992
|
+
totalTokens += tokens;
|
|
993
|
+
} else {
|
|
994
|
+
const remaining = budget - totalTokens;
|
|
995
|
+
if (remaining > MIN_PARTIAL_BUDGET) {
|
|
996
|
+
const briefSummary = yield* summarizeFile(resolvedPath, {
|
|
997
|
+
level: "brief",
|
|
998
|
+
maxTokens: remaining,
|
|
999
|
+
exclude: excludePatterns
|
|
1000
|
+
}).pipe(
|
|
1001
|
+
Effect5.map((s) => s),
|
|
1002
|
+
// Log error for observability before gracefully degrading
|
|
1003
|
+
Effect5.tapError(
|
|
1004
|
+
(error) => Effect5.logError(
|
|
1005
|
+
`Failed to create brief summary for ${sourcePath}`,
|
|
1006
|
+
error
|
|
1007
|
+
)
|
|
1008
|
+
),
|
|
1009
|
+
Effect5.catchAll(
|
|
1010
|
+
() => Effect5.succeed(null)
|
|
1011
|
+
)
|
|
1012
|
+
);
|
|
1013
|
+
if (briefSummary) {
|
|
1014
|
+
const briefContent = formatSummary(briefSummary);
|
|
1015
|
+
const briefTokens = countTokensApprox(briefContent);
|
|
1016
|
+
sources.push({
|
|
1017
|
+
path: path2.relative(rootPath, resolvedPath),
|
|
1018
|
+
title: briefSummary.title,
|
|
1019
|
+
tokens: briefTokens,
|
|
1020
|
+
content: briefContent
|
|
1021
|
+
});
|
|
1022
|
+
totalTokens += briefTokens;
|
|
1023
|
+
} else {
|
|
1024
|
+
overflow.push(path2.relative(rootPath, resolvedPath));
|
|
1025
|
+
}
|
|
1026
|
+
} else {
|
|
1027
|
+
overflow.push(path2.relative(rootPath, resolvedPath));
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
return {
|
|
1032
|
+
sources,
|
|
1033
|
+
totalTokens,
|
|
1034
|
+
budget,
|
|
1035
|
+
overflow
|
|
1036
|
+
};
|
|
1037
|
+
});
|
|
1038
|
+
|
|
1039
|
+
// src/embeddings/semantic-search.ts
|
|
1040
|
+
import * as fs4 from "fs/promises";
|
|
1041
|
+
import * as path4 from "path";
|
|
1042
|
+
import { Effect as Effect11 } from "effect";
|
|
1043
|
+
|
|
1044
|
+
// src/embeddings/hyde.ts
|
|
1045
|
+
import { Effect as Effect6, Redacted } from "effect";
|
|
1046
|
+
import OpenAI from "openai";
|
|
1047
|
+
var DEFAULT_MODEL = "gpt-4o-mini";
|
|
1048
|
+
var DEFAULT_MAX_TOKENS = 256;
|
|
1049
|
+
var DEFAULT_TEMPERATURE = 0.3;
|
|
1050
|
+
var DEFAULT_SYSTEM_PROMPT = `You are a technical documentation assistant. Given a user's question, write a short, factual passage that would appear in documentation answering this question.
|
|
1051
|
+
|
|
1052
|
+
Guidelines:
|
|
1053
|
+
- Write 2-4 concise paragraphs
|
|
1054
|
+
- Use technical but accessible language
|
|
1055
|
+
- Include specific details, code examples, or configuration options where relevant
|
|
1056
|
+
- Focus on directly answering the question
|
|
1057
|
+
- Do not include greetings, preambles, or meta-commentary
|
|
1058
|
+
- Write as if this is an excerpt from existing documentation`;
|
|
1059
|
+
var LLM_PRICING = {
|
|
1060
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6 },
|
|
1061
|
+
"gpt-4o": { input: 2.5, output: 10 },
|
|
1062
|
+
"gpt-4-turbo": { input: 10, output: 30 },
|
|
1063
|
+
"gpt-3.5-turbo": { input: 0.5, output: 1.5 }
|
|
1064
|
+
};
|
|
1065
|
+
var generateHypotheticalDocument = (query, options = {}) => Effect6.gen(function* () {
|
|
1066
|
+
const rawApiKey = options.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1067
|
+
if (!rawApiKey) {
|
|
1068
|
+
return yield* Effect6.fail(
|
|
1069
|
+
new ApiKeyMissingError({
|
|
1070
|
+
provider: "OpenAI",
|
|
1071
|
+
envVar: "OPENAI_API_KEY"
|
|
1072
|
+
})
|
|
1073
|
+
);
|
|
1074
|
+
}
|
|
1075
|
+
const redactedApiKey = Redacted.isRedacted(rawApiKey) ? rawApiKey : Redacted.make(rawApiKey);
|
|
1076
|
+
const client = new OpenAI({
|
|
1077
|
+
apiKey: Redacted.value(redactedApiKey),
|
|
1078
|
+
// Only expose when creating client
|
|
1079
|
+
baseURL: options.baseURL
|
|
1080
|
+
});
|
|
1081
|
+
const model = options.model ?? DEFAULT_MODEL;
|
|
1082
|
+
const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
1083
|
+
const temperature = options.temperature ?? DEFAULT_TEMPERATURE;
|
|
1084
|
+
const systemPrompt = options.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
|
|
1085
|
+
const response = yield* Effect6.tryPromise({
|
|
1086
|
+
try: async () => client.chat.completions.create({
|
|
1087
|
+
model,
|
|
1088
|
+
messages: [
|
|
1089
|
+
{ role: "system", content: systemPrompt },
|
|
1090
|
+
{ role: "user", content: query }
|
|
1091
|
+
],
|
|
1092
|
+
max_tokens: maxTokens,
|
|
1093
|
+
temperature
|
|
1094
|
+
}),
|
|
1095
|
+
catch: (error) => new EmbeddingError({
|
|
1096
|
+
reason: classifyLLMError(error),
|
|
1097
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1098
|
+
provider: "openai",
|
|
1099
|
+
cause: error
|
|
1100
|
+
})
|
|
1101
|
+
});
|
|
1102
|
+
const content = response.choices[0]?.message?.content ?? "";
|
|
1103
|
+
const inputTokens = response.usage?.prompt_tokens ?? 0;
|
|
1104
|
+
const outputTokens = response.usage?.completion_tokens ?? 0;
|
|
1105
|
+
const totalTokens = inputTokens + outputTokens;
|
|
1106
|
+
const pricing = LLM_PRICING[model] ?? LLM_PRICING["gpt-4o-mini"];
|
|
1107
|
+
const cost = inputTokens / 1e6 * pricing.input + outputTokens / 1e6 * pricing.output;
|
|
1108
|
+
return {
|
|
1109
|
+
hypotheticalDocument: content,
|
|
1110
|
+
originalQuery: query,
|
|
1111
|
+
model,
|
|
1112
|
+
tokensUsed: totalTokens,
|
|
1113
|
+
cost
|
|
1114
|
+
};
|
|
1115
|
+
});
|
|
1116
|
+
var classifyLLMError = (error) => {
|
|
1117
|
+
if (error instanceof OpenAI.RateLimitError) {
|
|
1118
|
+
return "RateLimit";
|
|
1119
|
+
}
|
|
1120
|
+
if (error instanceof OpenAI.BadRequestError) {
|
|
1121
|
+
const msg2 = (error.message || "").toLowerCase();
|
|
1122
|
+
if (msg2.includes("model")) return "ModelError";
|
|
1123
|
+
}
|
|
1124
|
+
if (error instanceof OpenAI.APIConnectionError) {
|
|
1125
|
+
return "Network";
|
|
1126
|
+
}
|
|
1127
|
+
if (!(error instanceof Error)) return "Unknown";
|
|
1128
|
+
const msg = error.message.toLowerCase();
|
|
1129
|
+
if (msg.includes("429") || msg.includes("rate limit")) return "RateLimit";
|
|
1130
|
+
if (msg.includes("quota") || msg.includes("billing")) return "QuotaExceeded";
|
|
1131
|
+
if (msg.includes("econnrefused") || msg.includes("network")) return "Network";
|
|
1132
|
+
if (msg.includes("model") && msg.includes("not found")) return "ModelError";
|
|
1133
|
+
return "Unknown";
|
|
1134
|
+
};
|
|
1135
|
+
|
|
1136
|
+
// src/embeddings/openai-provider.ts
|
|
1137
|
+
import { Effect as Effect7, Redacted as Redacted2 } from "effect";
|
|
1138
|
+
import OpenAI2 from "openai";
|
|
1139
|
+
|
|
1140
|
+
// src/embeddings/provider-constants.ts
|
|
1141
|
+
var MODEL_DIMENSIONS = {
|
|
1142
|
+
// OpenAI models (support MRL dimension reduction)
|
|
1143
|
+
"text-embedding-3-small": 1536,
|
|
1144
|
+
// Native: 1536, supports reduction
|
|
1145
|
+
"text-embedding-3-large": 3072,
|
|
1146
|
+
// Native: 3072, supports reduction
|
|
1147
|
+
"text-embedding-ada-002": 1536,
|
|
1148
|
+
// Native: 1536, does NOT support reduction
|
|
1149
|
+
// Ollama models (fixed native dimensions)
|
|
1150
|
+
"nomic-embed-text": 768,
|
|
1151
|
+
"mxbai-embed-large": 1024,
|
|
1152
|
+
"bge-m3": 1024,
|
|
1153
|
+
"all-minilm": 384,
|
|
1154
|
+
"snowflake-arctic-embed": 1024,
|
|
1155
|
+
// Voyage AI models (fixed native dimensions)
|
|
1156
|
+
"voyage-3.5-lite": 1024,
|
|
1157
|
+
// Best value: $0.02/1M tokens
|
|
1158
|
+
"voyage-3": 1024,
|
|
1159
|
+
// Higher quality: $0.06/1M tokens
|
|
1160
|
+
"voyage-code-3": 1024,
|
|
1161
|
+
// Code-optimized: $0.18/1M tokens
|
|
1162
|
+
"voyage-2": 1024,
|
|
1163
|
+
"voyage-large-2": 1536,
|
|
1164
|
+
"voyage-code-2": 1536
|
|
1165
|
+
};
|
|
1166
|
+
var MATRYOSHKA_MODELS = /* @__PURE__ */ new Set([
|
|
1167
|
+
"text-embedding-3-small",
|
|
1168
|
+
"text-embedding-3-large"
|
|
1169
|
+
]);
|
|
1170
|
+
var supportsMatryoshka = (model) => MATRYOSHKA_MODELS.has(model);
|
|
1171
|
+
var getRecommendedDimensions = (model) => {
|
|
1172
|
+
if (supportsMatryoshka(model)) {
|
|
1173
|
+
return 512;
|
|
1174
|
+
}
|
|
1175
|
+
return MODEL_DIMENSIONS[model];
|
|
1176
|
+
};
|
|
1177
|
+
var validateModelDimensions = (model, dimensions) => {
|
|
1178
|
+
const nativeDims = MODEL_DIMENSIONS[model];
|
|
1179
|
+
if (nativeDims === void 0) {
|
|
1180
|
+
return { isValid: true };
|
|
1181
|
+
}
|
|
1182
|
+
if (dimensions > nativeDims) {
|
|
1183
|
+
return {
|
|
1184
|
+
isValid: false,
|
|
1185
|
+
warning: `Model '${model}' has ${nativeDims} native dimensions, cannot use ${dimensions}`
|
|
1186
|
+
};
|
|
1187
|
+
}
|
|
1188
|
+
if (!supportsMatryoshka(model) && dimensions !== nativeDims) {
|
|
1189
|
+
return {
|
|
1190
|
+
isValid: false,
|
|
1191
|
+
warning: `Model '${model}' does not support dimension reduction, must use ${nativeDims}`
|
|
1192
|
+
};
|
|
1193
|
+
}
|
|
1194
|
+
return { isValid: true };
|
|
1195
|
+
};
|
|
1196
|
+
var PROVIDER_BASE_URLS = {
|
|
1197
|
+
openai: void 0,
|
|
1198
|
+
// Use OpenAI SDK default
|
|
1199
|
+
ollama: "http://localhost:11434/v1",
|
|
1200
|
+
"lm-studio": "http://localhost:1234/v1",
|
|
1201
|
+
openrouter: "https://openrouter.ai/api/v1",
|
|
1202
|
+
voyage: "https://api.voyageai.com/v1"
|
|
1203
|
+
// Native API, handled by VoyageProvider
|
|
1204
|
+
};
|
|
1205
|
+
var extractPortFromUrl = (url) => {
|
|
1206
|
+
const match = url.match(/:(\d+)\//);
|
|
1207
|
+
if (!match?.[1]) return void 0;
|
|
1208
|
+
return parseInt(match[1], 10);
|
|
1209
|
+
};
|
|
1210
|
+
var PROVIDER_PORTS = (() => {
|
|
1211
|
+
const ports = {};
|
|
1212
|
+
for (const [provider, url] of Object.entries(PROVIDER_BASE_URLS)) {
|
|
1213
|
+
if (url) {
|
|
1214
|
+
const port = extractPortFromUrl(url);
|
|
1215
|
+
if (port) ports[provider] = port;
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
return ports;
|
|
1219
|
+
})();
|
|
1220
|
+
var inferProviderFromUrl = (baseURL) => {
|
|
1221
|
+
if (!baseURL) return "openai";
|
|
1222
|
+
for (const [provider, providerUrl] of Object.entries(PROVIDER_BASE_URLS)) {
|
|
1223
|
+
if (providerUrl && baseURL.includes(providerUrl.replace("/v1", ""))) {
|
|
1224
|
+
return provider;
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
if (baseURL.includes("openrouter")) return "openrouter";
|
|
1228
|
+
return "openai";
|
|
1229
|
+
};
|
|
1230
|
+
|
|
1231
|
+
// src/embeddings/openai-provider.ts
|
|
1232
|
+
var PRICING_DATA = {
|
|
1233
|
+
/** Last update date in YYYY-MM format */
|
|
1234
|
+
lastUpdated: "2024-09",
|
|
1235
|
+
/** Source URL for verification */
|
|
1236
|
+
source: "https://platform.openai.com/docs/pricing",
|
|
1237
|
+
/** Prices per 1M tokens by model */
|
|
1238
|
+
prices: {
|
|
1239
|
+
"text-embedding-3-small": 0.02,
|
|
1240
|
+
"text-embedding-3-large": 0.13,
|
|
1241
|
+
"text-embedding-ada-002": 0.1
|
|
1242
|
+
}
|
|
1243
|
+
};
|
|
1244
|
+
var checkPricingFreshness = () => {
|
|
1245
|
+
const [year, month] = PRICING_DATA.lastUpdated.split("-").map(Number);
|
|
1246
|
+
if (!year || !month) return null;
|
|
1247
|
+
const lastUpdated = new Date(year, month - 1, 1);
|
|
1248
|
+
const now = /* @__PURE__ */ new Date();
|
|
1249
|
+
const daysSince = Math.floor(
|
|
1250
|
+
(now.getTime() - lastUpdated.getTime()) / (1e3 * 60 * 60 * 24)
|
|
1251
|
+
);
|
|
1252
|
+
if (daysSince > 90) {
|
|
1253
|
+
return `Pricing data is ${daysSince} days old. May not reflect current rates.`;
|
|
1254
|
+
}
|
|
1255
|
+
return null;
|
|
1256
|
+
};
|
|
1257
|
+
var getPricingDate = () => PRICING_DATA.lastUpdated;
|
|
1258
|
+
var OpenAIProvider = class _OpenAIProvider {
|
|
1259
|
+
name;
|
|
1260
|
+
dimensions;
|
|
1261
|
+
/** Provider name for error context */
|
|
1262
|
+
providerName;
|
|
1263
|
+
/** Model name */
|
|
1264
|
+
model;
|
|
1265
|
+
/** Base URL for API requests */
|
|
1266
|
+
baseURL;
|
|
1267
|
+
client;
|
|
1268
|
+
batchSize;
|
|
1269
|
+
constructor(apiKey, options = {}) {
|
|
1270
|
+
this.baseURL = options.baseURL;
|
|
1271
|
+
this.client = new OpenAI2({
|
|
1272
|
+
apiKey: Redacted2.value(apiKey),
|
|
1273
|
+
baseURL: options.baseURL,
|
|
1274
|
+
timeout: 3e4,
|
|
1275
|
+
maxRetries: 2
|
|
1276
|
+
});
|
|
1277
|
+
this.model = options.model ?? "text-embedding-3-small";
|
|
1278
|
+
this.batchSize = options.batchSize ?? 100;
|
|
1279
|
+
this.providerName = options.providerName ?? this.inferProviderName(options.baseURL);
|
|
1280
|
+
this.name = `${this.providerName}:${this.model}`;
|
|
1281
|
+
const recommendedDims = getRecommendedDimensions(this.model);
|
|
1282
|
+
this.dimensions = options.dimensions ?? recommendedDims ?? 512;
|
|
1283
|
+
}
|
|
1284
|
+
/**
|
|
1285
|
+
* Infer the provider name from the baseURL.
|
|
1286
|
+
* Delegates to centralized inferProviderFromUrl for single source of truth.
|
|
1287
|
+
*/
|
|
1288
|
+
inferProviderName(baseURL) {
|
|
1289
|
+
return inferProviderFromUrl(baseURL);
|
|
1290
|
+
}
|
|
1291
|
+
/**
|
|
1292
|
+
* Create an OpenAI provider instance.
|
|
1293
|
+
* Returns an Effect that fails with ApiKeyMissingError if no API key is available.
|
|
1294
|
+
*
|
|
1295
|
+
* API keys are handled securely using Effect's Redacted type to prevent
|
|
1296
|
+
* accidental logging of sensitive values.
|
|
1297
|
+
*/
|
|
1298
|
+
static create(options = {}) {
|
|
1299
|
+
const isOpenRouter = options.baseURL?.includes("openrouter") || options.providerName === "openrouter";
|
|
1300
|
+
const resolveApiKey = () => {
|
|
1301
|
+
if (options.apiKey !== void 0) {
|
|
1302
|
+
return options.apiKey;
|
|
1303
|
+
}
|
|
1304
|
+
return (isOpenRouter ? process.env.OPENROUTER_API_KEY : void 0) ?? process.env.OPENAI_API_KEY;
|
|
1305
|
+
};
|
|
1306
|
+
const rawApiKey = resolveApiKey();
|
|
1307
|
+
if (!rawApiKey) {
|
|
1308
|
+
return Effect7.fail(
|
|
1309
|
+
new ApiKeyMissingError({
|
|
1310
|
+
provider: isOpenRouter ? "OpenRouter" : "OpenAI",
|
|
1311
|
+
envVar: isOpenRouter ? "OPENROUTER_API_KEY" : "OPENAI_API_KEY"
|
|
1312
|
+
})
|
|
1313
|
+
);
|
|
1314
|
+
}
|
|
1315
|
+
const redactedApiKey = Redacted2.isRedacted(rawApiKey) ? rawApiKey : Redacted2.make(rawApiKey);
|
|
1316
|
+
const apiKeyValue = Redacted2.value(redactedApiKey);
|
|
1317
|
+
const shouldWarnOpenRouter = isOpenRouter && apiKeyValue.startsWith("sk-") && !apiKeyValue.startsWith("sk-or-");
|
|
1318
|
+
const model = options.model ?? "text-embedding-3-small";
|
|
1319
|
+
const dimensionValidation = options.dimensions ? validateModelDimensions(model, options.dimensions) : { isValid: true };
|
|
1320
|
+
return Effect7.succeed(new _OpenAIProvider(redactedApiKey, options)).pipe(
|
|
1321
|
+
shouldWarnOpenRouter ? Effect7.tap(
|
|
1322
|
+
() => Effect7.logWarning(
|
|
1323
|
+
'\u26A0\uFE0F Using OpenAI key format with OpenRouter. Consider setting OPENROUTER_API_KEY with a key starting with "sk-or-"'
|
|
1324
|
+
)
|
|
1325
|
+
) : (self) => self,
|
|
1326
|
+
// Warn about invalid dimension configuration
|
|
1327
|
+
dimensionValidation.warning ? Effect7.tap(
|
|
1328
|
+
() => Effect7.logWarning(`\u26A0\uFE0F ${dimensionValidation.warning}`)
|
|
1329
|
+
) : (self) => self
|
|
1330
|
+
);
|
|
1331
|
+
}
|
|
1332
|
+
async embed(texts) {
|
|
1333
|
+
if (texts.length === 0) {
|
|
1334
|
+
return { embeddings: [], tokensUsed: 0, cost: 0 };
|
|
1335
|
+
}
|
|
1336
|
+
const allEmbeddings = [];
|
|
1337
|
+
let totalTokens = 0;
|
|
1338
|
+
try {
|
|
1339
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
1340
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
1341
|
+
const embedParams = {
|
|
1342
|
+
model: this.model,
|
|
1343
|
+
input: batch
|
|
1344
|
+
};
|
|
1345
|
+
if (supportsMatryoshka(this.model)) {
|
|
1346
|
+
embedParams.dimensions = this.dimensions;
|
|
1347
|
+
}
|
|
1348
|
+
const response = await this.client.embeddings.create(embedParams);
|
|
1349
|
+
for (const item of response.data) {
|
|
1350
|
+
allEmbeddings.push(item.embedding);
|
|
1351
|
+
}
|
|
1352
|
+
totalTokens += response.usage?.total_tokens ?? 0;
|
|
1353
|
+
}
|
|
1354
|
+
} catch (error) {
|
|
1355
|
+
if (error instanceof OpenAI2.AuthenticationError) {
|
|
1356
|
+
throw new ApiKeyInvalidError({
|
|
1357
|
+
provider: this.providerName,
|
|
1358
|
+
details: error.message
|
|
1359
|
+
});
|
|
1360
|
+
}
|
|
1361
|
+
throw new EmbeddingError({
|
|
1362
|
+
reason: this.classifyError(error),
|
|
1363
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1364
|
+
provider: this.providerName,
|
|
1365
|
+
cause: error
|
|
1366
|
+
});
|
|
1367
|
+
}
|
|
1368
|
+
const pricePerMillion = this.providerName === "openai" || this.providerName === "openrouter" ? PRICING_DATA.prices[this.model] ?? 0.02 : 0;
|
|
1369
|
+
const cost = totalTokens / 1e6 * pricePerMillion;
|
|
1370
|
+
return {
|
|
1371
|
+
embeddings: allEmbeddings,
|
|
1372
|
+
tokensUsed: totalTokens,
|
|
1373
|
+
cost
|
|
1374
|
+
};
|
|
1375
|
+
}
|
|
1376
|
+
/**
|
|
1377
|
+
* Classify an error into a known category for better error handling.
|
|
1378
|
+
* Uses OpenAI SDK error types where available, falls back to string matching
|
|
1379
|
+
* for non-OpenAI providers (Ollama, LM Studio, OpenRouter).
|
|
1380
|
+
*/
|
|
1381
|
+
classifyError(error) {
|
|
1382
|
+
if (error instanceof OpenAI2.RateLimitError) {
|
|
1383
|
+
return "RateLimit";
|
|
1384
|
+
}
|
|
1385
|
+
if (error instanceof OpenAI2.BadRequestError) {
|
|
1386
|
+
const msg2 = error.message.toLowerCase();
|
|
1387
|
+
if (msg2.includes("model")) return "ModelError";
|
|
1388
|
+
}
|
|
1389
|
+
if (error instanceof OpenAI2.APIConnectionError) {
|
|
1390
|
+
return "Network";
|
|
1391
|
+
}
|
|
1392
|
+
if (!(error instanceof Error)) return "Unknown";
|
|
1393
|
+
const msg = error.message.toLowerCase();
|
|
1394
|
+
if (msg.includes("429") || msg.includes("rate limit") || msg.includes("too many requests")) {
|
|
1395
|
+
return "RateLimit";
|
|
1396
|
+
}
|
|
1397
|
+
if (msg.includes("quota") || msg.includes("insufficient") || msg.includes("billing")) {
|
|
1398
|
+
return "QuotaExceeded";
|
|
1399
|
+
}
|
|
1400
|
+
if (msg.includes("econnrefused") || msg.includes("timeout") || msg.includes("network") || msg.includes("enotfound") || msg.includes("connection")) {
|
|
1401
|
+
return "Network";
|
|
1402
|
+
}
|
|
1403
|
+
if (msg.includes("model") && (msg.includes("not found") || msg.includes("not exist") || msg.includes("invalid"))) {
|
|
1404
|
+
return "ModelError";
|
|
1405
|
+
}
|
|
1406
|
+
return "Unknown";
|
|
1407
|
+
}
|
|
1408
|
+
};
|
|
1409
|
+
var createOpenAIProvider = (options) => OpenAIProvider.create(options);
|
|
1410
|
+
var wrapEmbedding = (embedPromise, providerName = "openai") => Effect7.tryPromise({
|
|
1411
|
+
try: () => embedPromise,
|
|
1412
|
+
catch: (e) => {
|
|
1413
|
+
if (e instanceof ApiKeyInvalidError) {
|
|
1414
|
+
return e;
|
|
1415
|
+
}
|
|
1416
|
+
return new EmbeddingError({
|
|
1417
|
+
reason: "Unknown",
|
|
1418
|
+
message: e instanceof Error ? e.message : String(e),
|
|
1419
|
+
provider: providerName,
|
|
1420
|
+
cause: e
|
|
1421
|
+
});
|
|
1422
|
+
}
|
|
1423
|
+
});
|
|
1424
|
+
|
|
1425
|
+
// src/embeddings/provider-factory.ts
|
|
1426
|
+
import { Effect as Effect9, Option, Redacted as Redacted4 } from "effect";
|
|
1427
|
+
|
|
1428
|
+
// src/embeddings/voyage-provider.ts
|
|
1429
|
+
import { Effect as Effect8, Redacted as Redacted3 } from "effect";
|
|
1430
|
+
var VOYAGE_API_BASE = "https://api.voyageai.com/v1";
|
|
1431
|
+
var VOYAGE_MODELS = {
|
|
1432
|
+
"voyage-3.5-lite": { dimensions: 1024, pricePerMillion: 0.02 },
|
|
1433
|
+
"voyage-3": { dimensions: 1024, pricePerMillion: 0.06 },
|
|
1434
|
+
"voyage-code-3": { dimensions: 1024, pricePerMillion: 0.18 },
|
|
1435
|
+
// Legacy models
|
|
1436
|
+
"voyage-2": { dimensions: 1024, pricePerMillion: 0.1 },
|
|
1437
|
+
"voyage-large-2": { dimensions: 1536, pricePerMillion: 0.12 },
|
|
1438
|
+
"voyage-code-2": { dimensions: 1536, pricePerMillion: 0.12 }
|
|
1439
|
+
};
|
|
1440
|
+
var DEFAULT_VOYAGE_MODEL = "voyage-3.5-lite";
|
|
1441
|
+
var VoyageProvider = class _VoyageProvider {
|
|
1442
|
+
name;
|
|
1443
|
+
dimensions;
|
|
1444
|
+
model;
|
|
1445
|
+
baseURL = VOYAGE_API_BASE;
|
|
1446
|
+
providerName = "voyage";
|
|
1447
|
+
apiKey;
|
|
1448
|
+
batchSize;
|
|
1449
|
+
constructor(apiKey, options = {}) {
|
|
1450
|
+
this.apiKey = apiKey;
|
|
1451
|
+
this.model = options.model ?? DEFAULT_VOYAGE_MODEL;
|
|
1452
|
+
this.batchSize = options.batchSize ?? 128;
|
|
1453
|
+
const modelSpec = VOYAGE_MODELS[this.model];
|
|
1454
|
+
this.dimensions = modelSpec?.dimensions ?? 1024;
|
|
1455
|
+
this.name = `voyage:${this.model}`;
|
|
1456
|
+
}
|
|
1457
|
+
/**
|
|
1458
|
+
* Create a Voyage provider instance.
|
|
1459
|
+
* Returns an Effect that fails with ApiKeyMissingError if no API key is available.
|
|
1460
|
+
*
|
|
1461
|
+
* API keys are handled securely using Effect's Redacted type to prevent
|
|
1462
|
+
* accidental logging of sensitive values.
|
|
1463
|
+
*/
|
|
1464
|
+
static create(options = {}) {
|
|
1465
|
+
const rawApiKey = options.apiKey ?? process.env.VOYAGE_API_KEY;
|
|
1466
|
+
if (!rawApiKey) {
|
|
1467
|
+
return Effect8.fail(
|
|
1468
|
+
new ApiKeyMissingError({
|
|
1469
|
+
provider: "Voyage AI",
|
|
1470
|
+
envVar: "VOYAGE_API_KEY"
|
|
1471
|
+
})
|
|
1472
|
+
);
|
|
1473
|
+
}
|
|
1474
|
+
const redactedApiKey = Redacted3.isRedacted(rawApiKey) ? rawApiKey : Redacted3.make(rawApiKey);
|
|
1475
|
+
return Effect8.succeed(new _VoyageProvider(redactedApiKey, options));
|
|
1476
|
+
}
|
|
1477
|
+
async embed(texts) {
|
|
1478
|
+
if (texts.length === 0) {
|
|
1479
|
+
return { embeddings: [], tokensUsed: 0, cost: 0 };
|
|
1480
|
+
}
|
|
1481
|
+
const allEmbeddings = [];
|
|
1482
|
+
let totalTokens = 0;
|
|
1483
|
+
try {
|
|
1484
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
1485
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
1486
|
+
const response = await fetch(`${VOYAGE_API_BASE}/embeddings`, {
|
|
1487
|
+
method: "POST",
|
|
1488
|
+
headers: {
|
|
1489
|
+
Authorization: `Bearer ${Redacted3.value(this.apiKey)}`,
|
|
1490
|
+
"Content-Type": "application/json"
|
|
1491
|
+
},
|
|
1492
|
+
body: JSON.stringify({
|
|
1493
|
+
model: this.model,
|
|
1494
|
+
input: batch,
|
|
1495
|
+
input_type: "document"
|
|
1496
|
+
// 'document' for indexing, 'query' for searching
|
|
1497
|
+
})
|
|
1498
|
+
});
|
|
1499
|
+
if (!response.ok) {
|
|
1500
|
+
const errorText = await response.text();
|
|
1501
|
+
if (response.status === 401) {
|
|
1502
|
+
throw new ApiKeyInvalidError({
|
|
1503
|
+
provider: "Voyage AI",
|
|
1504
|
+
details: errorText
|
|
1505
|
+
});
|
|
1506
|
+
}
|
|
1507
|
+
throw new EmbeddingError({
|
|
1508
|
+
reason: this.classifyHttpError(response.status, errorText),
|
|
1509
|
+
message: `Voyage API error: ${response.status} - ${errorText}`,
|
|
1510
|
+
provider: "voyage"
|
|
1511
|
+
});
|
|
1512
|
+
}
|
|
1513
|
+
const data = await response.json();
|
|
1514
|
+
for (const item of data.data) {
|
|
1515
|
+
allEmbeddings.push(item.embedding);
|
|
1516
|
+
}
|
|
1517
|
+
totalTokens += data.usage?.total_tokens ?? 0;
|
|
1518
|
+
}
|
|
1519
|
+
} catch (error) {
|
|
1520
|
+
if (error instanceof ApiKeyInvalidError || error instanceof EmbeddingError) {
|
|
1521
|
+
throw error;
|
|
1522
|
+
}
|
|
1523
|
+
throw new EmbeddingError({
|
|
1524
|
+
reason: this.classifyError(error),
|
|
1525
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1526
|
+
provider: "voyage",
|
|
1527
|
+
cause: error
|
|
1528
|
+
});
|
|
1529
|
+
}
|
|
1530
|
+
const pricePerMillion = VOYAGE_MODELS[this.model]?.pricePerMillion ?? 0.02;
|
|
1531
|
+
const cost = totalTokens / 1e6 * pricePerMillion;
|
|
1532
|
+
return {
|
|
1533
|
+
embeddings: allEmbeddings,
|
|
1534
|
+
tokensUsed: totalTokens,
|
|
1535
|
+
cost
|
|
1536
|
+
};
|
|
1537
|
+
}
|
|
1538
|
+
classifyHttpError(status, _message) {
|
|
1539
|
+
if (status === 429) return "RateLimit";
|
|
1540
|
+
if (status === 402) return "QuotaExceeded";
|
|
1541
|
+
if (status === 400) return "ModelError";
|
|
1542
|
+
return "Unknown";
|
|
1543
|
+
}
|
|
1544
|
+
classifyError(error) {
|
|
1545
|
+
if (!(error instanceof Error)) return "Unknown";
|
|
1546
|
+
const msg = error.message.toLowerCase();
|
|
1547
|
+
if (msg.includes("rate limit") || msg.includes("429")) return "RateLimit";
|
|
1548
|
+
if (msg.includes("quota") || msg.includes("billing")) return "QuotaExceeded";
|
|
1549
|
+
if (msg.includes("econnrefused") || msg.includes("timeout") || msg.includes("network"))
|
|
1550
|
+
return "Network";
|
|
1551
|
+
if (msg.includes("model") && msg.includes("not found")) return "ModelError";
|
|
1552
|
+
return "Unknown";
|
|
1553
|
+
}
|
|
1554
|
+
};
|
|
1555
|
+
var createVoyageProvider = (options) => VoyageProvider.create(options);
|
|
1556
|
+
|
|
1557
|
+
// src/embeddings/provider-factory.ts
|
|
1558
|
+
var getProviderBaseURL = (provider, configBaseURL) => {
|
|
1559
|
+
if (Option.isSome(configBaseURL)) {
|
|
1560
|
+
return configBaseURL.value;
|
|
1561
|
+
}
|
|
1562
|
+
return PROVIDER_BASE_URLS[provider];
|
|
1563
|
+
};
|
|
1564
|
+
var normalizeBaseURL = (baseURL) => {
|
|
1565
|
+
if (baseURL === void 0) {
|
|
1566
|
+
return Option.none();
|
|
1567
|
+
}
|
|
1568
|
+
if (typeof baseURL === "string") {
|
|
1569
|
+
return Option.some(baseURL);
|
|
1570
|
+
}
|
|
1571
|
+
return baseURL;
|
|
1572
|
+
};
|
|
1573
|
+
var normalizeApiKey = (apiKey) => {
|
|
1574
|
+
if (apiKey === void 0) {
|
|
1575
|
+
return void 0;
|
|
1576
|
+
}
|
|
1577
|
+
if (Redacted4.isRedacted(apiKey)) {
|
|
1578
|
+
return apiKey;
|
|
1579
|
+
}
|
|
1580
|
+
if (typeof apiKey === "string") {
|
|
1581
|
+
return apiKey;
|
|
1582
|
+
}
|
|
1583
|
+
return Option.isSome(apiKey) ? apiKey.value : void 0;
|
|
1584
|
+
};
|
|
1585
|
+
var createEmbeddingProviderDirect = (config) => Effect9.gen(function* () {
|
|
1586
|
+
const provider = config.provider;
|
|
1587
|
+
const baseURL = getProviderBaseURL(
|
|
1588
|
+
provider,
|
|
1589
|
+
normalizeBaseURL(config.baseURL)
|
|
1590
|
+
);
|
|
1591
|
+
if (provider === "voyage") {
|
|
1592
|
+
return yield* createVoyageProvider({
|
|
1593
|
+
model: config.model,
|
|
1594
|
+
batchSize: config.batchSize,
|
|
1595
|
+
apiKey: normalizeApiKey(config.apiKey)
|
|
1596
|
+
});
|
|
1597
|
+
}
|
|
1598
|
+
return yield* createOpenAIProvider({
|
|
1599
|
+
model: config.model,
|
|
1600
|
+
dimensions: config.dimensions,
|
|
1601
|
+
batchSize: config.batchSize,
|
|
1602
|
+
baseURL,
|
|
1603
|
+
apiKey: normalizeApiKey(config.apiKey)
|
|
1604
|
+
});
|
|
1605
|
+
});
|
|
1606
|
+
|
|
1607
|
+
// src/embeddings/types.ts
|
|
1608
|
+
var hasProviderMetadata = (provider) => {
|
|
1609
|
+
return "model" in provider && typeof provider.model === "string";
|
|
1610
|
+
};
|
|
1611
|
+
var QUALITY_EF_SEARCH = {
|
|
1612
|
+
fast: 64,
|
|
1613
|
+
balanced: 100,
|
|
1614
|
+
thorough: 256
|
|
1615
|
+
};
|
|
1616
|
+
var HEADING_BOOST_FACTOR = 0.05;
|
|
1617
|
+
var FILE_IMPORTANCE_BOOST = 0.03;
|
|
1618
|
+
var IMPORTANT_FILE_PATTERNS = [
|
|
1619
|
+
/^readme\.md$/i,
|
|
1620
|
+
// Root README
|
|
1621
|
+
/\/readme\.md$/i,
|
|
1622
|
+
// Nested README
|
|
1623
|
+
/^index\.md$/i,
|
|
1624
|
+
// Index files
|
|
1625
|
+
/\/index\.md$/i,
|
|
1626
|
+
/^getting-?started/i,
|
|
1627
|
+
// Getting started guides
|
|
1628
|
+
/\/getting-?started/i,
|
|
1629
|
+
/^introduction/i,
|
|
1630
|
+
// Introductions
|
|
1631
|
+
/\/introduction/i,
|
|
1632
|
+
/^overview/i,
|
|
1633
|
+
// Overviews
|
|
1634
|
+
/\/overview/i,
|
|
1635
|
+
/^quickstart/i,
|
|
1636
|
+
// Quickstart guides
|
|
1637
|
+
/\/quickstart/i,
|
|
1638
|
+
/^changelog\.md$/i,
|
|
1639
|
+
// Changelogs (useful for "what changed" queries)
|
|
1640
|
+
/\/changelog\.md$/i
|
|
1641
|
+
];
|
|
1642
|
+
var calculateFileImportanceBoost = (documentPath) => {
|
|
1643
|
+
const isImportant = IMPORTANT_FILE_PATTERNS.some(
|
|
1644
|
+
(pattern) => pattern.test(documentPath)
|
|
1645
|
+
);
|
|
1646
|
+
return isImportant ? FILE_IMPORTANCE_BOOST : 0;
|
|
1647
|
+
};
|
|
1648
|
+
var calculateHeadingBoost = (heading, query) => {
|
|
1649
|
+
const queryTerms = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
1650
|
+
if (queryTerms.length === 0) return 0;
|
|
1651
|
+
const headingLower = heading.toLowerCase();
|
|
1652
|
+
const matchCount = queryTerms.filter(
|
|
1653
|
+
(term) => headingLower.includes(term)
|
|
1654
|
+
).length;
|
|
1655
|
+
return matchCount * HEADING_BOOST_FACTOR;
|
|
1656
|
+
};
|
|
1657
|
+
var preprocessQuery = (query) => {
|
|
1658
|
+
return query.toLowerCase().replace(/[^\w\s]/g, " ").replace(/\s+/g, " ").trim();
|
|
1659
|
+
};
|
|
1660
|
+
|
|
1661
|
+
// src/embeddings/vector-store.ts
|
|
1662
|
+
import * as fs3 from "fs/promises";
|
|
1663
|
+
import * as path3 from "path";
|
|
1664
|
+
import * as msgpack from "@msgpack/msgpack";
|
|
1665
|
+
import { Effect as Effect10 } from "effect";
|
|
1666
|
+
import HierarchicalNSW from "hnswlib-node";
|
|
1667
|
+
var VECTOR_INDEX_FILE = "vectors.bin";
|
|
1668
|
+
var VECTOR_META_FILE = "vectors.meta.bin";
|
|
1669
|
+
var INDEX_VERSION = 1;
|
|
1670
|
+
var HnswVectorStore = class {
|
|
1671
|
+
rootPath;
|
|
1672
|
+
dimensions;
|
|
1673
|
+
index = null;
|
|
1674
|
+
entries = /* @__PURE__ */ new Map();
|
|
1675
|
+
idToIndex = /* @__PURE__ */ new Map();
|
|
1676
|
+
nextIndex = 0;
|
|
1677
|
+
provider = "unknown";
|
|
1678
|
+
providerModel = void 0;
|
|
1679
|
+
providerBaseURL = void 0;
|
|
1680
|
+
totalCost = 0;
|
|
1681
|
+
totalTokens = 0;
|
|
1682
|
+
// HNSW build parameters
|
|
1683
|
+
hnswM;
|
|
1684
|
+
hnswEfConstruction;
|
|
1685
|
+
constructor(rootPath, dimensions, hnswOptions) {
|
|
1686
|
+
this.rootPath = path3.resolve(rootPath);
|
|
1687
|
+
this.dimensions = dimensions;
|
|
1688
|
+
this.hnswM = hnswOptions?.m ?? 16;
|
|
1689
|
+
this.hnswEfConstruction = hnswOptions?.efConstruction ?? 200;
|
|
1690
|
+
}
|
|
1691
|
+
getIndexDir() {
|
|
1692
|
+
return path3.join(this.rootPath, INDEX_DIR);
|
|
1693
|
+
}
|
|
1694
|
+
getVectorPath() {
|
|
1695
|
+
return path3.join(this.getIndexDir(), VECTOR_INDEX_FILE);
|
|
1696
|
+
}
|
|
1697
|
+
getMetaPath() {
|
|
1698
|
+
return path3.join(this.getIndexDir(), VECTOR_META_FILE);
|
|
1699
|
+
}
|
|
1700
|
+
ensureIndex() {
|
|
1701
|
+
if (!this.index) {
|
|
1702
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
1703
|
+
"cosine",
|
|
1704
|
+
this.dimensions
|
|
1705
|
+
);
|
|
1706
|
+
this.index.initIndex(1e4, this.hnswM, this.hnswEfConstruction, 100);
|
|
1707
|
+
}
|
|
1708
|
+
return this.index;
|
|
1709
|
+
}
|
|
1710
|
+
add(entries) {
|
|
1711
|
+
return Effect10.try({
|
|
1712
|
+
try: () => {
|
|
1713
|
+
const index = this.ensureIndex();
|
|
1714
|
+
for (const entry of entries) {
|
|
1715
|
+
if (this.idToIndex.has(entry.id)) {
|
|
1716
|
+
continue;
|
|
1717
|
+
}
|
|
1718
|
+
const idx = this.nextIndex++;
|
|
1719
|
+
if (idx >= index.getMaxElements()) {
|
|
1720
|
+
index.resizeIndex(index.getMaxElements() * 2);
|
|
1721
|
+
}
|
|
1722
|
+
index.addPoint(entry.embedding, idx);
|
|
1723
|
+
this.entries.set(idx, entry);
|
|
1724
|
+
this.idToIndex.set(entry.id, idx);
|
|
1725
|
+
}
|
|
1726
|
+
},
|
|
1727
|
+
catch: (e) => new VectorStoreError({
|
|
1728
|
+
operation: "add",
|
|
1729
|
+
message: e instanceof Error ? e.message : String(e),
|
|
1730
|
+
cause: e
|
|
1731
|
+
})
|
|
1732
|
+
});
|
|
1733
|
+
}
|
|
1734
|
+
search(vector, limit, threshold = 0, options) {
|
|
1735
|
+
return Effect10.try({
|
|
1736
|
+
try: () => {
|
|
1737
|
+
if (!this.index || this.entries.size === 0) {
|
|
1738
|
+
return [];
|
|
1739
|
+
}
|
|
1740
|
+
if (options?.efSearch !== void 0) {
|
|
1741
|
+
this.index.setEf(options.efSearch);
|
|
1742
|
+
}
|
|
1743
|
+
const result = this.index.searchKnn(
|
|
1744
|
+
vector,
|
|
1745
|
+
Math.min(limit, this.entries.size)
|
|
1746
|
+
);
|
|
1747
|
+
const results = [];
|
|
1748
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
1749
|
+
const idx = result.neighbors[i];
|
|
1750
|
+
const distance = result.distances[i];
|
|
1751
|
+
if (idx === void 0 || distance === void 0) {
|
|
1752
|
+
continue;
|
|
1753
|
+
}
|
|
1754
|
+
const similarity = 1 - distance;
|
|
1755
|
+
if (similarity < threshold) {
|
|
1756
|
+
continue;
|
|
1757
|
+
}
|
|
1758
|
+
const entry = this.entries.get(idx);
|
|
1759
|
+
if (entry) {
|
|
1760
|
+
results.push({
|
|
1761
|
+
id: entry.id,
|
|
1762
|
+
sectionId: entry.sectionId,
|
|
1763
|
+
documentPath: entry.documentPath,
|
|
1764
|
+
heading: entry.heading,
|
|
1765
|
+
similarity
|
|
1766
|
+
});
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
return results;
|
|
1770
|
+
},
|
|
1771
|
+
catch: (e) => new VectorStoreError({
|
|
1772
|
+
operation: "search",
|
|
1773
|
+
message: e instanceof Error ? e.message : String(e),
|
|
1774
|
+
cause: e
|
|
1775
|
+
})
|
|
1776
|
+
});
|
|
1777
|
+
}
|
|
1778
|
+
searchWithStats(vector, limit, threshold = 0, options) {
|
|
1779
|
+
return Effect10.try({
|
|
1780
|
+
try: () => {
|
|
1781
|
+
if (!this.index || this.entries.size === 0) {
|
|
1782
|
+
return {
|
|
1783
|
+
results: [],
|
|
1784
|
+
belowThresholdCount: 0,
|
|
1785
|
+
belowThresholdHighest: null
|
|
1786
|
+
};
|
|
1787
|
+
}
|
|
1788
|
+
if (options?.efSearch !== void 0) {
|
|
1789
|
+
this.index.setEf(options.efSearch);
|
|
1790
|
+
}
|
|
1791
|
+
const result = this.index.searchKnn(
|
|
1792
|
+
vector,
|
|
1793
|
+
Math.min(limit, this.entries.size)
|
|
1794
|
+
);
|
|
1795
|
+
const results = [];
|
|
1796
|
+
let belowThresholdCount = 0;
|
|
1797
|
+
let belowThresholdHighest = null;
|
|
1798
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
1799
|
+
const idx = result.neighbors[i];
|
|
1800
|
+
const distance = result.distances[i];
|
|
1801
|
+
if (idx === void 0 || distance === void 0) {
|
|
1802
|
+
continue;
|
|
1803
|
+
}
|
|
1804
|
+
const similarity = 1 - distance;
|
|
1805
|
+
const entry = this.entries.get(idx);
|
|
1806
|
+
if (!entry) continue;
|
|
1807
|
+
if (similarity < threshold) {
|
|
1808
|
+
belowThresholdCount++;
|
|
1809
|
+
if (belowThresholdHighest === null || similarity > belowThresholdHighest) {
|
|
1810
|
+
belowThresholdHighest = similarity;
|
|
1811
|
+
}
|
|
1812
|
+
continue;
|
|
1813
|
+
}
|
|
1814
|
+
results.push({
|
|
1815
|
+
id: entry.id,
|
|
1816
|
+
sectionId: entry.sectionId,
|
|
1817
|
+
documentPath: entry.documentPath,
|
|
1818
|
+
heading: entry.heading,
|
|
1819
|
+
similarity
|
|
1820
|
+
});
|
|
1821
|
+
}
|
|
1822
|
+
return {
|
|
1823
|
+
results,
|
|
1824
|
+
belowThresholdCount,
|
|
1825
|
+
belowThresholdHighest
|
|
1826
|
+
};
|
|
1827
|
+
},
|
|
1828
|
+
catch: (e) => new VectorStoreError({
|
|
1829
|
+
operation: "search",
|
|
1830
|
+
message: e instanceof Error ? e.message : String(e),
|
|
1831
|
+
cause: e
|
|
1832
|
+
})
|
|
1833
|
+
});
|
|
1834
|
+
}
|
|
1835
|
+
save() {
|
|
1836
|
+
return Effect10.gen(
|
|
1837
|
+
function* () {
|
|
1838
|
+
if (!this.index) {
|
|
1839
|
+
return;
|
|
1840
|
+
}
|
|
1841
|
+
const indexDir = this.getIndexDir();
|
|
1842
|
+
yield* Effect10.tryPromise({
|
|
1843
|
+
try: () => fs3.mkdir(indexDir, { recursive: true }),
|
|
1844
|
+
catch: (e) => new VectorStoreError({
|
|
1845
|
+
operation: "save",
|
|
1846
|
+
message: `Failed to create directory: ${e instanceof Error ? e.message : String(e)}`,
|
|
1847
|
+
cause: e
|
|
1848
|
+
})
|
|
1849
|
+
});
|
|
1850
|
+
yield* Effect10.tryPromise({
|
|
1851
|
+
try: () => this.index.writeIndex(this.getVectorPath()),
|
|
1852
|
+
catch: (e) => new VectorStoreError({
|
|
1853
|
+
operation: "save",
|
|
1854
|
+
message: `Failed to write index: ${e instanceof Error ? e.message : String(e)}`,
|
|
1855
|
+
cause: e
|
|
1856
|
+
})
|
|
1857
|
+
});
|
|
1858
|
+
const meta = {
|
|
1859
|
+
version: INDEX_VERSION,
|
|
1860
|
+
provider: this.provider,
|
|
1861
|
+
providerModel: this.providerModel,
|
|
1862
|
+
providerBaseURL: this.providerBaseURL,
|
|
1863
|
+
dimensions: this.dimensions,
|
|
1864
|
+
entries: Object.fromEntries(
|
|
1865
|
+
Array.from(this.entries.entries()).map(([idx, entry]) => [
|
|
1866
|
+
idx.toString(),
|
|
1867
|
+
entry
|
|
1868
|
+
])
|
|
1869
|
+
),
|
|
1870
|
+
totalCost: this.totalCost,
|
|
1871
|
+
totalTokens: this.totalTokens,
|
|
1872
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1873
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1874
|
+
// Store HNSW build parameters for validation on load
|
|
1875
|
+
hnswParams: {
|
|
1876
|
+
m: this.hnswM,
|
|
1877
|
+
efConstruction: this.hnswEfConstruction
|
|
1878
|
+
}
|
|
1879
|
+
};
|
|
1880
|
+
yield* Effect10.tryPromise({
|
|
1881
|
+
try: async () => {
|
|
1882
|
+
const estimatedSize = this.entries.size * 15e3;
|
|
1883
|
+
if (estimatedSize > 1e8) {
|
|
1884
|
+
console.warn(
|
|
1885
|
+
`Large metadata detected: ~${(estimatedSize / 1e6).toFixed(0)}MB. Consider indexing subdirectories separately.`
|
|
1886
|
+
);
|
|
1887
|
+
}
|
|
1888
|
+
const encoded = msgpack.encode(meta);
|
|
1889
|
+
await fs3.writeFile(this.getMetaPath(), encoded);
|
|
1890
|
+
},
|
|
1891
|
+
catch: (e) => new VectorStoreError({
|
|
1892
|
+
operation: "save",
|
|
1893
|
+
message: `Failed to write metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
1894
|
+
cause: e
|
|
1895
|
+
})
|
|
1896
|
+
});
|
|
1897
|
+
}.bind(this)
|
|
1898
|
+
);
|
|
1899
|
+
}
|
|
1900
|
+
load() {
|
|
1901
|
+
return Effect10.gen(
|
|
1902
|
+
function* () {
|
|
1903
|
+
const vectorPath = this.getVectorPath();
|
|
1904
|
+
const metaPath = this.getMetaPath();
|
|
1905
|
+
const filesExist = yield* Effect10.tryPromise({
|
|
1906
|
+
try: async () => {
|
|
1907
|
+
await fs3.access(vectorPath);
|
|
1908
|
+
await fs3.access(metaPath);
|
|
1909
|
+
return true;
|
|
1910
|
+
},
|
|
1911
|
+
catch: () => new VectorStoreError({
|
|
1912
|
+
operation: "load",
|
|
1913
|
+
message: "Files not found"
|
|
1914
|
+
})
|
|
1915
|
+
}).pipe(
|
|
1916
|
+
Effect10.catchTag("VectorStoreError", () => Effect10.succeed(false))
|
|
1917
|
+
);
|
|
1918
|
+
if (!filesExist) {
|
|
1919
|
+
return { loaded: false };
|
|
1920
|
+
}
|
|
1921
|
+
const loadedMeta = yield* Effect10.tryPromise({
|
|
1922
|
+
try: async () => {
|
|
1923
|
+
try {
|
|
1924
|
+
await fs3.access(metaPath);
|
|
1925
|
+
const buffer = await fs3.readFile(metaPath);
|
|
1926
|
+
return msgpack.decode(buffer);
|
|
1927
|
+
} catch {
|
|
1928
|
+
const jsonPath = metaPath.replace(".bin", ".json");
|
|
1929
|
+
try {
|
|
1930
|
+
await fs3.access(jsonPath);
|
|
1931
|
+
const json = await fs3.readFile(jsonPath, "utf-8");
|
|
1932
|
+
const meta2 = JSON.parse(json);
|
|
1933
|
+
const encoded = msgpack.encode(meta2);
|
|
1934
|
+
await fs3.writeFile(metaPath, encoded);
|
|
1935
|
+
await fs3.unlink(jsonPath);
|
|
1936
|
+
return meta2;
|
|
1937
|
+
} catch {
|
|
1938
|
+
throw new Error("Metadata file not found");
|
|
1939
|
+
}
|
|
1940
|
+
}
|
|
1941
|
+
},
|
|
1942
|
+
catch: (e) => new VectorStoreError({
|
|
1943
|
+
operation: "load",
|
|
1944
|
+
message: `Failed to read metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
1945
|
+
cause: e
|
|
1946
|
+
})
|
|
1947
|
+
});
|
|
1948
|
+
const meta = {
|
|
1949
|
+
...loadedMeta,
|
|
1950
|
+
provider: loadedMeta.provider || "openai"
|
|
1951
|
+
};
|
|
1952
|
+
if (meta.dimensions !== this.dimensions) {
|
|
1953
|
+
return yield* Effect10.fail(
|
|
1954
|
+
new DimensionMismatchError({
|
|
1955
|
+
corpusDimensions: meta.dimensions,
|
|
1956
|
+
providerDimensions: this.dimensions,
|
|
1957
|
+
corpusProvider: meta.providerModel ? `${meta.provider}:${meta.providerModel}` : meta.provider,
|
|
1958
|
+
path: this.rootPath
|
|
1959
|
+
})
|
|
1960
|
+
);
|
|
1961
|
+
}
|
|
1962
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
1963
|
+
"cosine",
|
|
1964
|
+
this.dimensions
|
|
1965
|
+
);
|
|
1966
|
+
yield* Effect10.tryPromise({
|
|
1967
|
+
try: () => this.index.readIndex(vectorPath),
|
|
1968
|
+
catch: (e) => new VectorStoreError({
|
|
1969
|
+
operation: "load",
|
|
1970
|
+
message: `Failed to read index: ${e instanceof Error ? e.message : String(e)}`,
|
|
1971
|
+
cause: e
|
|
1972
|
+
})
|
|
1973
|
+
});
|
|
1974
|
+
this.entries.clear();
|
|
1975
|
+
this.idToIndex.clear();
|
|
1976
|
+
this.nextIndex = 0;
|
|
1977
|
+
for (const [idxStr, entry] of Object.entries(meta.entries)) {
|
|
1978
|
+
const idx = parseInt(idxStr, 10);
|
|
1979
|
+
this.entries.set(idx, entry);
|
|
1980
|
+
this.idToIndex.set(entry.id, idx);
|
|
1981
|
+
this.nextIndex = Math.max(this.nextIndex, idx + 1);
|
|
1982
|
+
}
|
|
1983
|
+
this.provider = meta.provider;
|
|
1984
|
+
this.providerModel = meta.providerModel;
|
|
1985
|
+
this.providerBaseURL = meta.providerBaseURL;
|
|
1986
|
+
this.totalCost = meta.totalCost;
|
|
1987
|
+
this.totalTokens = meta.totalTokens;
|
|
1988
|
+
let hnswMismatch;
|
|
1989
|
+
if (meta.hnswParams) {
|
|
1990
|
+
const indexM = meta.hnswParams.m;
|
|
1991
|
+
const indexEf = meta.hnswParams.efConstruction;
|
|
1992
|
+
if (indexM !== this.hnswM || indexEf !== this.hnswEfConstruction) {
|
|
1993
|
+
hnswMismatch = {
|
|
1994
|
+
configParams: {
|
|
1995
|
+
m: this.hnswM,
|
|
1996
|
+
efConstruction: this.hnswEfConstruction
|
|
1997
|
+
},
|
|
1998
|
+
indexParams: { m: indexM, efConstruction: indexEf }
|
|
1999
|
+
};
|
|
2000
|
+
}
|
|
2001
|
+
}
|
|
2002
|
+
return { loaded: true, hnswMismatch };
|
|
2003
|
+
}.bind(this)
|
|
2004
|
+
);
|
|
2005
|
+
}
|
|
2006
|
+
getStats() {
|
|
2007
|
+
return {
|
|
2008
|
+
count: this.entries.size,
|
|
2009
|
+
dimensions: this.dimensions,
|
|
2010
|
+
provider: this.provider,
|
|
2011
|
+
providerModel: this.providerModel,
|
|
2012
|
+
totalCost: this.totalCost,
|
|
2013
|
+
totalTokens: this.totalTokens
|
|
2014
|
+
};
|
|
2015
|
+
}
|
|
2016
|
+
setProvider(name, model, baseURL) {
|
|
2017
|
+
this.provider = name;
|
|
2018
|
+
this.providerModel = model;
|
|
2019
|
+
this.providerBaseURL = baseURL;
|
|
2020
|
+
}
|
|
2021
|
+
addCost(cost, tokens) {
|
|
2022
|
+
this.totalCost += cost;
|
|
2023
|
+
this.totalTokens += tokens;
|
|
2024
|
+
}
|
|
2025
|
+
};
|
|
2026
|
+
var createVectorStore = (rootPath, dimensions, hnswOptions) => new HnswVectorStore(rootPath, dimensions, hnswOptions);
|
|
2027
|
+
|
|
2028
|
+
// src/embeddings/semantic-search.ts
|
|
2029
|
+
var checkHnswMismatch = (mismatch) => {
|
|
2030
|
+
if (!mismatch) {
|
|
2031
|
+
return Effect11.void;
|
|
2032
|
+
}
|
|
2033
|
+
const { configParams, indexParams } = mismatch;
|
|
2034
|
+
return Effect11.logWarning(
|
|
2035
|
+
`HNSW parameter mismatch: Index was built with M=${indexParams.m}, efConstruction=${indexParams.efConstruction}, but config specifies M=${configParams.m}, efConstruction=${configParams.efConstruction}. HNSW parameters only affect index construction. Run 'mdcontext index --embed --force' to rebuild with new parameters.`
|
|
2036
|
+
);
|
|
2037
|
+
};
|
|
2038
|
+
var checkProviderMismatch = (stats, currentProvider, currentProviderModel) => {
|
|
2039
|
+
if (stats.providerModel && stats.providerModel !== currentProviderModel) {
|
|
2040
|
+
return Effect11.logWarning(
|
|
2041
|
+
`Provider mismatch: Index was created with ${stats.provider}/${stats.providerModel}, but querying with ${currentProvider}/${currentProviderModel}. Results may be inconsistent. Consider re-indexing.`
|
|
2042
|
+
);
|
|
2043
|
+
}
|
|
2044
|
+
if (!stats.providerModel) {
|
|
2045
|
+
const indexProviderParts = stats.provider.split(":");
|
|
2046
|
+
if (indexProviderParts.length === 2 && indexProviderParts[1] !== currentProviderModel) {
|
|
2047
|
+
return Effect11.logWarning(
|
|
2048
|
+
`Provider mismatch: Index was created with ${indexProviderParts[0]}/${indexProviderParts[1]}, but querying with ${currentProvider}/${currentProviderModel}. Results may be inconsistent. Consider re-indexing.`
|
|
2049
|
+
);
|
|
2050
|
+
}
|
|
2051
|
+
}
|
|
2052
|
+
return Effect11.void;
|
|
2053
|
+
};
|
|
2054
|
+
var generateEmbeddingText = (section, content, documentTitle, parentHeading) => {
|
|
2055
|
+
const parts = [];
|
|
2056
|
+
parts.push(`# ${section.heading}`);
|
|
2057
|
+
if (parentHeading) {
|
|
2058
|
+
parts.push(`Parent section: ${parentHeading}`);
|
|
2059
|
+
}
|
|
2060
|
+
parts.push(`Document: ${documentTitle}`);
|
|
2061
|
+
parts.push("");
|
|
2062
|
+
parts.push(content);
|
|
2063
|
+
return parts.join("\n");
|
|
2064
|
+
};
|
|
2065
|
+
var EMBEDDING_PRICE_PER_MILLION = PRICING_DATA.prices["text-embedding-3-small"] ?? 0.02;
|
|
2066
|
+
var estimateEmbeddingCost = (rootPath, options = {}) => Effect11.gen(function* () {
|
|
2067
|
+
const resolvedRoot = path4.resolve(rootPath);
|
|
2068
|
+
const storage = createStorage(resolvedRoot);
|
|
2069
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
2070
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
2071
|
+
if (!docIndex || !sectionIndex) {
|
|
2072
|
+
return yield* Effect11.fail(new IndexNotFoundError({ path: resolvedRoot }));
|
|
2073
|
+
}
|
|
2074
|
+
const byDir = /* @__PURE__ */ new Map();
|
|
2075
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
2076
|
+
if (section.tokenCount < 10) continue;
|
|
2077
|
+
if (options.excludePatterns?.length) {
|
|
2078
|
+
const excluded = options.excludePatterns.some((pattern) => {
|
|
2079
|
+
const regex = new RegExp(
|
|
2080
|
+
`^${pattern.replace(/\*/g, ".*").replace(/\?/g, ".")}$`
|
|
2081
|
+
);
|
|
2082
|
+
return regex.test(section.documentPath);
|
|
2083
|
+
});
|
|
2084
|
+
if (excluded) continue;
|
|
2085
|
+
}
|
|
2086
|
+
const dir = path4.dirname(section.documentPath) || ".";
|
|
2087
|
+
if (!byDir.has(dir)) {
|
|
2088
|
+
byDir.set(dir, { files: /* @__PURE__ */ new Set(), sections: 0, tokens: 0 });
|
|
2089
|
+
}
|
|
2090
|
+
const entry = byDir.get(dir);
|
|
2091
|
+
entry.files.add(section.documentPath);
|
|
2092
|
+
entry.sections++;
|
|
2093
|
+
entry.tokens += section.tokenCount;
|
|
2094
|
+
}
|
|
2095
|
+
const directoryEstimates = [];
|
|
2096
|
+
let totalFiles = 0;
|
|
2097
|
+
let totalSections = 0;
|
|
2098
|
+
let totalTokens = 0;
|
|
2099
|
+
for (const [dir, data] of byDir) {
|
|
2100
|
+
directoryEstimates.push({
|
|
2101
|
+
directory: dir,
|
|
2102
|
+
fileCount: data.files.size,
|
|
2103
|
+
sectionCount: data.sections,
|
|
2104
|
+
estimatedTokens: data.tokens,
|
|
2105
|
+
estimatedCost: data.tokens / 1e6 * EMBEDDING_PRICE_PER_MILLION
|
|
2106
|
+
});
|
|
2107
|
+
totalFiles += data.files.size;
|
|
2108
|
+
totalSections += data.sections;
|
|
2109
|
+
totalTokens += data.tokens;
|
|
2110
|
+
}
|
|
2111
|
+
directoryEstimates.sort((a, b) => a.directory.localeCompare(b.directory));
|
|
2112
|
+
const estimatedTimeSeconds = Math.ceil(totalSections / 100) * 1.5;
|
|
2113
|
+
return {
|
|
2114
|
+
totalFiles,
|
|
2115
|
+
totalSections,
|
|
2116
|
+
totalTokens,
|
|
2117
|
+
totalCost: totalTokens / 1e6 * EMBEDDING_PRICE_PER_MILLION,
|
|
2118
|
+
estimatedTimeSeconds,
|
|
2119
|
+
byDirectory: directoryEstimates
|
|
2120
|
+
};
|
|
2121
|
+
});
|
|
2122
|
+
var buildEmbeddings = (rootPath, options = {}) => Effect11.gen(function* () {
|
|
2123
|
+
const startTime = Date.now();
|
|
2124
|
+
const resolvedRoot = path4.resolve(rootPath);
|
|
2125
|
+
const storage = createStorage(resolvedRoot);
|
|
2126
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
2127
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
2128
|
+
if (!docIndex || !sectionIndex) {
|
|
2129
|
+
return yield* Effect11.fail(new IndexNotFoundError({ path: resolvedRoot }));
|
|
2130
|
+
}
|
|
2131
|
+
const providerConfig = options.providerConfig ?? { provider: "openai" };
|
|
2132
|
+
const provider = options.provider ?? (yield* createEmbeddingProviderDirect(providerConfig));
|
|
2133
|
+
const dimensions = provider.dimensions;
|
|
2134
|
+
const vectorStore = createVectorStore(
|
|
2135
|
+
resolvedRoot,
|
|
2136
|
+
dimensions,
|
|
2137
|
+
options.hnswOptions
|
|
2138
|
+
);
|
|
2139
|
+
if (hasProviderMetadata(provider)) {
|
|
2140
|
+
vectorStore.setProvider(provider.name, provider.model, provider.baseURL);
|
|
2141
|
+
} else {
|
|
2142
|
+
vectorStore.setProvider(provider.name, void 0, void 0);
|
|
2143
|
+
}
|
|
2144
|
+
if (!options.force) {
|
|
2145
|
+
const loadResult = yield* vectorStore.load();
|
|
2146
|
+
if (loadResult.loaded) {
|
|
2147
|
+
const stats = vectorStore.getStats();
|
|
2148
|
+
if (stats.count > 0) {
|
|
2149
|
+
const duration2 = Date.now() - startTime;
|
|
2150
|
+
const estimatedSavings = stats.totalTokens / 1e6 * EMBEDDING_PRICE_PER_MILLION;
|
|
2151
|
+
return {
|
|
2152
|
+
sectionsEmbedded: 0,
|
|
2153
|
+
tokensUsed: 0,
|
|
2154
|
+
cost: 0,
|
|
2155
|
+
duration: duration2,
|
|
2156
|
+
filesProcessed: 0,
|
|
2157
|
+
cacheHit: true,
|
|
2158
|
+
existingVectors: stats.count,
|
|
2159
|
+
estimatedSavings
|
|
2160
|
+
};
|
|
2161
|
+
}
|
|
2162
|
+
}
|
|
2163
|
+
}
|
|
2164
|
+
const isExcluded = (docPath) => {
|
|
2165
|
+
if (!options.excludePatterns?.length) return false;
|
|
2166
|
+
return options.excludePatterns.some((pattern) => {
|
|
2167
|
+
const regex = new RegExp(
|
|
2168
|
+
`^${pattern.replace(/\*/g, ".*").replace(/\?/g, ".")}$`
|
|
2169
|
+
);
|
|
2170
|
+
return regex.test(docPath);
|
|
2171
|
+
});
|
|
2172
|
+
};
|
|
2173
|
+
const sectionsByDoc = /* @__PURE__ */ new Map();
|
|
2174
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
2175
|
+
const document = docIndex.documents[section.documentPath];
|
|
2176
|
+
if (!document) continue;
|
|
2177
|
+
if (section.tokenCount < 10) continue;
|
|
2178
|
+
if (isExcluded(section.documentPath)) continue;
|
|
2179
|
+
let parentHeading;
|
|
2180
|
+
if (section.level > 1) {
|
|
2181
|
+
const docSections = sectionIndex.byDocument[document.id] ?? [];
|
|
2182
|
+
for (const sibId of docSections) {
|
|
2183
|
+
const sib = sectionIndex.sections[sibId];
|
|
2184
|
+
if (sib && sib.level === section.level - 1 && sib.startLine < section.startLine) {
|
|
2185
|
+
parentHeading = sib.heading;
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
}
|
|
2189
|
+
const docPath = section.documentPath;
|
|
2190
|
+
if (!sectionsByDoc.has(docPath)) {
|
|
2191
|
+
sectionsByDoc.set(docPath, []);
|
|
2192
|
+
}
|
|
2193
|
+
sectionsByDoc.get(docPath).push({ section, parentHeading });
|
|
2194
|
+
}
|
|
2195
|
+
if (sectionsByDoc.size === 0) {
|
|
2196
|
+
const duration2 = Date.now() - startTime;
|
|
2197
|
+
return {
|
|
2198
|
+
sectionsEmbedded: 0,
|
|
2199
|
+
tokensUsed: 0,
|
|
2200
|
+
cost: 0,
|
|
2201
|
+
duration: duration2,
|
|
2202
|
+
filesProcessed: 0
|
|
2203
|
+
};
|
|
2204
|
+
}
|
|
2205
|
+
const sectionsToEmbed = [];
|
|
2206
|
+
const docPaths = Array.from(sectionsByDoc.keys());
|
|
2207
|
+
let filesProcessed = 0;
|
|
2208
|
+
for (let fileIndex = 0; fileIndex < docPaths.length; fileIndex++) {
|
|
2209
|
+
const docPath = docPaths[fileIndex];
|
|
2210
|
+
const sections = sectionsByDoc.get(docPath);
|
|
2211
|
+
const document = docIndex.documents[docPath];
|
|
2212
|
+
if (!document) continue;
|
|
2213
|
+
if (options.onFileProgress) {
|
|
2214
|
+
options.onFileProgress({
|
|
2215
|
+
fileIndex: fileIndex + 1,
|
|
2216
|
+
totalFiles: docPaths.length,
|
|
2217
|
+
filePath: docPath,
|
|
2218
|
+
sectionCount: sections.length
|
|
2219
|
+
});
|
|
2220
|
+
}
|
|
2221
|
+
const filePath = path4.join(resolvedRoot, docPath);
|
|
2222
|
+
const fileContentResult = yield* Effect11.promise(
|
|
2223
|
+
() => fs4.readFile(filePath, "utf-8")
|
|
2224
|
+
).pipe(
|
|
2225
|
+
Effect11.map((content) => ({ ok: true, content })),
|
|
2226
|
+
Effect11.catchAll(
|
|
2227
|
+
() => Effect11.succeed({ ok: false, content: "" })
|
|
2228
|
+
)
|
|
2229
|
+
);
|
|
2230
|
+
if (!fileContentResult.ok) {
|
|
2231
|
+
yield* Effect11.logWarning(`Skipping file (cannot read): ${docPath}`);
|
|
2232
|
+
continue;
|
|
2233
|
+
}
|
|
2234
|
+
filesProcessed++;
|
|
2235
|
+
const lines = fileContentResult.content.split("\n");
|
|
2236
|
+
for (const { section, parentHeading } of sections) {
|
|
2237
|
+
const content = lines.slice(section.startLine - 1, section.endLine).join("\n");
|
|
2238
|
+
const text = generateEmbeddingText(
|
|
2239
|
+
section,
|
|
2240
|
+
content,
|
|
2241
|
+
document.title,
|
|
2242
|
+
parentHeading
|
|
2243
|
+
);
|
|
2244
|
+
sectionsToEmbed.push({ section, text });
|
|
2245
|
+
}
|
|
2246
|
+
}
|
|
2247
|
+
if (sectionsToEmbed.length === 0) {
|
|
2248
|
+
const duration2 = Date.now() - startTime;
|
|
2249
|
+
return {
|
|
2250
|
+
sectionsEmbedded: 0,
|
|
2251
|
+
tokensUsed: 0,
|
|
2252
|
+
cost: 0,
|
|
2253
|
+
duration: duration2,
|
|
2254
|
+
filesProcessed
|
|
2255
|
+
};
|
|
2256
|
+
}
|
|
2257
|
+
const texts = sectionsToEmbed.map((s) => s.text);
|
|
2258
|
+
const result = yield* wrapEmbedding(
|
|
2259
|
+
provider.embed(texts),
|
|
2260
|
+
providerConfig.provider ?? "openai"
|
|
2261
|
+
);
|
|
2262
|
+
const entries = [];
|
|
2263
|
+
for (let i = 0; i < sectionsToEmbed.length; i++) {
|
|
2264
|
+
const { section } = sectionsToEmbed[i] ?? { section: null };
|
|
2265
|
+
const embedding = result.embeddings[i];
|
|
2266
|
+
if (!section || !embedding) continue;
|
|
2267
|
+
entries.push({
|
|
2268
|
+
id: section.id,
|
|
2269
|
+
sectionId: section.id,
|
|
2270
|
+
documentPath: section.documentPath,
|
|
2271
|
+
heading: section.heading,
|
|
2272
|
+
embedding
|
|
2273
|
+
});
|
|
2274
|
+
}
|
|
2275
|
+
yield* vectorStore.add(entries);
|
|
2276
|
+
vectorStore.addCost(result.cost, result.tokensUsed);
|
|
2277
|
+
yield* vectorStore.save();
|
|
2278
|
+
const duration = Date.now() - startTime;
|
|
2279
|
+
return {
|
|
2280
|
+
sectionsEmbedded: entries.length,
|
|
2281
|
+
tokensUsed: result.tokensUsed,
|
|
2282
|
+
cost: result.cost,
|
|
2283
|
+
duration,
|
|
2284
|
+
filesProcessed
|
|
2285
|
+
};
|
|
2286
|
+
});
|
|
2287
|
+
var semanticSearch = (rootPath, query, options = {}) => Effect11.gen(function* () {
|
|
2288
|
+
const resolvedRoot = path4.resolve(rootPath);
|
|
2289
|
+
const provider = yield* createEmbeddingProviderDirect(
|
|
2290
|
+
options.providerConfig ?? { provider: "openai" }
|
|
2291
|
+
);
|
|
2292
|
+
const dimensions = provider.dimensions;
|
|
2293
|
+
const vectorStore = createVectorStore(resolvedRoot, dimensions);
|
|
2294
|
+
const loadResult = yield* vectorStore.load();
|
|
2295
|
+
if (!loadResult.loaded) {
|
|
2296
|
+
return yield* Effect11.fail(
|
|
2297
|
+
new EmbeddingsNotFoundError({ path: resolvedRoot })
|
|
2298
|
+
);
|
|
2299
|
+
}
|
|
2300
|
+
const stats = vectorStore.getStats();
|
|
2301
|
+
const currentProviderModel = options.providerConfig?.model ?? "text-embedding-3-small";
|
|
2302
|
+
const currentProvider = options.providerConfig?.provider ?? "openai";
|
|
2303
|
+
yield* checkProviderMismatch(stats, currentProvider, currentProviderModel);
|
|
2304
|
+
yield* checkHnswMismatch(loadResult.hnswMismatch);
|
|
2305
|
+
let textToEmbed;
|
|
2306
|
+
let hydeResult;
|
|
2307
|
+
if (options.hyde) {
|
|
2308
|
+
hydeResult = yield* generateHypotheticalDocument(query, {
|
|
2309
|
+
model: options.hydeOptions?.model,
|
|
2310
|
+
maxTokens: options.hydeOptions?.maxTokens,
|
|
2311
|
+
temperature: options.hydeOptions?.temperature
|
|
2312
|
+
});
|
|
2313
|
+
textToEmbed = hydeResult.hypotheticalDocument;
|
|
2314
|
+
yield* Effect11.logDebug(
|
|
2315
|
+
`HyDE generated ${hydeResult.tokensUsed} tokens ($${hydeResult.cost.toFixed(6)})`
|
|
2316
|
+
);
|
|
2317
|
+
} else {
|
|
2318
|
+
textToEmbed = options.skipPreprocessing ? query : preprocessQuery(query);
|
|
2319
|
+
}
|
|
2320
|
+
const queryResult = yield* wrapEmbedding(
|
|
2321
|
+
provider.embed([textToEmbed]),
|
|
2322
|
+
currentProvider
|
|
2323
|
+
);
|
|
2324
|
+
const queryVector = queryResult.embeddings[0];
|
|
2325
|
+
if (!queryVector) {
|
|
2326
|
+
return yield* Effect11.fail(
|
|
2327
|
+
new EmbeddingError({
|
|
2328
|
+
reason: "Unknown",
|
|
2329
|
+
message: "Failed to generate query embedding",
|
|
2330
|
+
provider: currentProvider
|
|
2331
|
+
})
|
|
2332
|
+
);
|
|
2333
|
+
}
|
|
2334
|
+
const limit = options.limit ?? 10;
|
|
2335
|
+
const threshold = options.threshold ?? 0;
|
|
2336
|
+
const efSearch = options.quality ? QUALITY_EF_SEARCH[options.quality] : void 0;
|
|
2337
|
+
const searchResults = yield* vectorStore.search(
|
|
2338
|
+
queryVector,
|
|
2339
|
+
limit * 2,
|
|
2340
|
+
threshold,
|
|
2341
|
+
{ efSearch }
|
|
2342
|
+
);
|
|
2343
|
+
let filteredResults = searchResults;
|
|
2344
|
+
if (options.pathPattern) {
|
|
2345
|
+
const pattern = options.pathPattern.replace(/\./g, "\\.").replace(/\*/g, ".*");
|
|
2346
|
+
const regex = new RegExp(`^${pattern}$`, "i");
|
|
2347
|
+
filteredResults = searchResults.filter((r) => regex.test(r.documentPath));
|
|
2348
|
+
}
|
|
2349
|
+
const applyBoost = options.headingBoost !== false;
|
|
2350
|
+
const boostedResults = applyBoost ? filteredResults.map((r) => ({
|
|
2351
|
+
...r,
|
|
2352
|
+
similarity: Math.min(
|
|
2353
|
+
1,
|
|
2354
|
+
r.similarity + calculateHeadingBoost(r.heading, query) + calculateFileImportanceBoost(r.documentPath)
|
|
2355
|
+
)
|
|
2356
|
+
})) : filteredResults;
|
|
2357
|
+
const results = boostedResults.sort((a, b) => b.similarity - a.similarity).slice(0, limit).map((r) => ({
|
|
2358
|
+
sectionId: r.sectionId,
|
|
2359
|
+
documentPath: r.documentPath,
|
|
2360
|
+
heading: r.heading,
|
|
2361
|
+
similarity: r.similarity
|
|
2362
|
+
}));
|
|
2363
|
+
return results;
|
|
2364
|
+
});
|
|
2365
|
+
var semanticSearchWithStats = (rootPath, query, options = {}) => Effect11.gen(function* () {
|
|
2366
|
+
const resolvedRoot = path4.resolve(rootPath);
|
|
2367
|
+
const provider = yield* createEmbeddingProviderDirect(
|
|
2368
|
+
options.providerConfig ?? { provider: "openai" }
|
|
2369
|
+
);
|
|
2370
|
+
const dimensions = provider.dimensions;
|
|
2371
|
+
const vectorStore = createVectorStore(resolvedRoot, dimensions);
|
|
2372
|
+
const loadResult = yield* vectorStore.load();
|
|
2373
|
+
if (!loadResult.loaded) {
|
|
2374
|
+
return yield* Effect11.fail(
|
|
2375
|
+
new EmbeddingsNotFoundError({ path: resolvedRoot })
|
|
2376
|
+
);
|
|
2377
|
+
}
|
|
2378
|
+
const stats = vectorStore.getStats();
|
|
2379
|
+
const currentProviderModel = options.providerConfig?.model ?? "text-embedding-3-small";
|
|
2380
|
+
const currentProvider = options.providerConfig?.provider ?? "openai";
|
|
2381
|
+
yield* checkProviderMismatch(stats, currentProvider, currentProviderModel);
|
|
2382
|
+
yield* checkHnswMismatch(loadResult.hnswMismatch);
|
|
2383
|
+
let textToEmbed;
|
|
2384
|
+
let hydeResult;
|
|
2385
|
+
if (options.hyde) {
|
|
2386
|
+
hydeResult = yield* generateHypotheticalDocument(query, {
|
|
2387
|
+
model: options.hydeOptions?.model,
|
|
2388
|
+
maxTokens: options.hydeOptions?.maxTokens,
|
|
2389
|
+
temperature: options.hydeOptions?.temperature
|
|
2390
|
+
});
|
|
2391
|
+
textToEmbed = hydeResult.hypotheticalDocument;
|
|
2392
|
+
yield* Effect11.logDebug(
|
|
2393
|
+
`HyDE generated ${hydeResult.tokensUsed} tokens ($${hydeResult.cost.toFixed(6)})`
|
|
2394
|
+
);
|
|
2395
|
+
} else {
|
|
2396
|
+
textToEmbed = options.skipPreprocessing ? query : preprocessQuery(query);
|
|
2397
|
+
}
|
|
2398
|
+
const queryResult = yield* wrapEmbedding(
|
|
2399
|
+
provider.embed([textToEmbed]),
|
|
2400
|
+
currentProvider
|
|
2401
|
+
);
|
|
2402
|
+
const queryVector = queryResult.embeddings[0];
|
|
2403
|
+
if (!queryVector) {
|
|
2404
|
+
return yield* Effect11.fail(
|
|
2405
|
+
new EmbeddingError({
|
|
2406
|
+
reason: "Unknown",
|
|
2407
|
+
message: "Failed to generate query embedding",
|
|
2408
|
+
provider: currentProvider
|
|
2409
|
+
})
|
|
2410
|
+
);
|
|
2411
|
+
}
|
|
2412
|
+
const limit = options.limit ?? 10;
|
|
2413
|
+
const threshold = options.threshold ?? 0;
|
|
2414
|
+
const efSearch = options.quality ? QUALITY_EF_SEARCH[options.quality] : void 0;
|
|
2415
|
+
const searchResultWithStats = yield* vectorStore.searchWithStats(
|
|
2416
|
+
queryVector,
|
|
2417
|
+
limit * 2,
|
|
2418
|
+
threshold,
|
|
2419
|
+
{ efSearch }
|
|
2420
|
+
);
|
|
2421
|
+
let filteredResults = searchResultWithStats.results;
|
|
2422
|
+
if (options.pathPattern) {
|
|
2423
|
+
const pattern = options.pathPattern.replace(/\./g, "\\.").replace(/\*/g, ".*");
|
|
2424
|
+
const regex = new RegExp(`^${pattern}$`, "i");
|
|
2425
|
+
filteredResults = searchResultWithStats.results.filter(
|
|
2426
|
+
(r) => regex.test(r.documentPath)
|
|
2427
|
+
);
|
|
2428
|
+
}
|
|
2429
|
+
const applyBoost = options.headingBoost !== false;
|
|
2430
|
+
const boostedResults = applyBoost ? filteredResults.map((r) => ({
|
|
2431
|
+
...r,
|
|
2432
|
+
similarity: Math.min(
|
|
2433
|
+
1,
|
|
2434
|
+
r.similarity + calculateHeadingBoost(r.heading, query) + calculateFileImportanceBoost(r.documentPath)
|
|
2435
|
+
)
|
|
2436
|
+
})) : filteredResults;
|
|
2437
|
+
const sortedResults = boostedResults.sort(
|
|
2438
|
+
(a, b) => b.similarity - a.similarity
|
|
2439
|
+
);
|
|
2440
|
+
const totalAvailable = sortedResults.length;
|
|
2441
|
+
const results = sortedResults.slice(0, limit).map((r) => ({
|
|
2442
|
+
sectionId: r.sectionId,
|
|
2443
|
+
documentPath: r.documentPath,
|
|
2444
|
+
heading: r.heading,
|
|
2445
|
+
similarity: r.similarity
|
|
2446
|
+
}));
|
|
2447
|
+
return {
|
|
2448
|
+
results,
|
|
2449
|
+
belowThresholdCount: searchResultWithStats.belowThresholdCount,
|
|
2450
|
+
belowThresholdHighest: searchResultWithStats.belowThresholdHighest ?? void 0,
|
|
2451
|
+
totalAvailable
|
|
2452
|
+
};
|
|
2453
|
+
});
|
|
2454
|
+
var getEmbeddingStats = (rootPath) => Effect11.gen(function* () {
|
|
2455
|
+
const resolvedRoot = path4.resolve(rootPath);
|
|
2456
|
+
const metaPath = path4.join(resolvedRoot, INDEX_DIR, "vectors.meta.json");
|
|
2457
|
+
const metaContent = yield* Effect11.tryPromise({
|
|
2458
|
+
try: () => fs4.readFile(metaPath, "utf-8"),
|
|
2459
|
+
catch: (e) => new VectorStoreError({
|
|
2460
|
+
operation: "load",
|
|
2461
|
+
message: `Failed to read metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
2462
|
+
cause: e
|
|
2463
|
+
})
|
|
2464
|
+
}).pipe(Effect11.catchAll(() => Effect11.succeed(null)));
|
|
2465
|
+
if (!metaContent) {
|
|
2466
|
+
return {
|
|
2467
|
+
hasEmbeddings: false,
|
|
2468
|
+
count: 0,
|
|
2469
|
+
provider: "none",
|
|
2470
|
+
dimensions: 0,
|
|
2471
|
+
totalCost: 0,
|
|
2472
|
+
totalTokens: 0
|
|
2473
|
+
};
|
|
2474
|
+
}
|
|
2475
|
+
const meta = yield* Effect11.try({
|
|
2476
|
+
try: () => JSON.parse(metaContent),
|
|
2477
|
+
catch: (e) => new VectorStoreError({
|
|
2478
|
+
operation: "load",
|
|
2479
|
+
message: `Failed to parse metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
2480
|
+
cause: e
|
|
2481
|
+
})
|
|
2482
|
+
});
|
|
2483
|
+
return {
|
|
2484
|
+
hasEmbeddings: true,
|
|
2485
|
+
count: Object.keys(meta.entries).length,
|
|
2486
|
+
provider: meta.providerModel ? `${meta.provider}:${meta.providerModel}` : meta.provider || "openai",
|
|
2487
|
+
dimensions: meta.dimensions,
|
|
2488
|
+
totalCost: meta.totalCost || 0,
|
|
2489
|
+
totalTokens: meta.totalTokens || 0
|
|
2490
|
+
};
|
|
2491
|
+
});
|
|
2492
|
+
|
|
2493
|
+
// src/search/searcher.ts
|
|
2494
|
+
import * as fs5 from "fs/promises";
|
|
2495
|
+
import * as path5 from "path";
|
|
2496
|
+
import { Effect as Effect12 } from "effect";
|
|
2497
|
+
|
|
2498
|
+
// src/search/fuzzy-search.ts
|
|
2499
|
+
import { stemmer } from "stemmer";
|
|
2500
|
+
var stem = (word) => {
|
|
2501
|
+
return stemmer(word.toLowerCase());
|
|
2502
|
+
};
|
|
2503
|
+
var levenshteinDistance = (a, b) => {
|
|
2504
|
+
const matrix = [];
|
|
2505
|
+
for (let i = 0; i <= a.length; i++) {
|
|
2506
|
+
matrix[i] = [i];
|
|
2507
|
+
}
|
|
2508
|
+
for (let j = 0; j <= b.length; j++) {
|
|
2509
|
+
matrix[0][j] = j;
|
|
2510
|
+
}
|
|
2511
|
+
for (let i = 1; i <= a.length; i++) {
|
|
2512
|
+
for (let j = 1; j <= b.length; j++) {
|
|
2513
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
2514
|
+
matrix[i][j] = Math.min(
|
|
2515
|
+
matrix[i - 1][j] + 1,
|
|
2516
|
+
// deletion
|
|
2517
|
+
matrix[i][j - 1] + 1,
|
|
2518
|
+
// insertion
|
|
2519
|
+
matrix[i - 1][j - 1] + cost
|
|
2520
|
+
// substitution
|
|
2521
|
+
);
|
|
2522
|
+
}
|
|
2523
|
+
}
|
|
2524
|
+
return matrix[a.length][b.length];
|
|
2525
|
+
};
|
|
2526
|
+
var isFuzzyMatch = (word1, word2, maxDistance = 2) => {
|
|
2527
|
+
if (Math.abs(word1.length - word2.length) > maxDistance) {
|
|
2528
|
+
return false;
|
|
2529
|
+
}
|
|
2530
|
+
return levenshteinDistance(word1.toLowerCase(), word2.toLowerCase()) <= maxDistance;
|
|
2531
|
+
};
|
|
2532
|
+
var matchesWithOptions = (query, text, options = {}) => {
|
|
2533
|
+
const { stem: useStemming, fuzzyDistance } = options;
|
|
2534
|
+
const queryWords = query.toLowerCase().split(/[\W_]+/).filter((w) => w.length > 0);
|
|
2535
|
+
const textWords = text.toLowerCase().split(/[\W_]+/).filter((w) => w.length > 0);
|
|
2536
|
+
if (queryWords.length === 0) {
|
|
2537
|
+
return true;
|
|
2538
|
+
}
|
|
2539
|
+
for (const queryWord of queryWords) {
|
|
2540
|
+
let found = false;
|
|
2541
|
+
for (const textWord of textWords) {
|
|
2542
|
+
if (textWord === queryWord) {
|
|
2543
|
+
found = true;
|
|
2544
|
+
break;
|
|
2545
|
+
}
|
|
2546
|
+
if (useStemming) {
|
|
2547
|
+
if (stem(textWord) === stem(queryWord)) {
|
|
2548
|
+
found = true;
|
|
2549
|
+
break;
|
|
2550
|
+
}
|
|
2551
|
+
}
|
|
2552
|
+
if (fuzzyDistance !== void 0 && fuzzyDistance > 0) {
|
|
2553
|
+
if (isFuzzyMatch(textWord, queryWord, fuzzyDistance)) {
|
|
2554
|
+
found = true;
|
|
2555
|
+
break;
|
|
2556
|
+
}
|
|
2557
|
+
}
|
|
2558
|
+
}
|
|
2559
|
+
if (!found) {
|
|
2560
|
+
return false;
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
return true;
|
|
2564
|
+
};
|
|
2565
|
+
var findMatchesInLine = (queryWords, line, options = {}) => {
|
|
2566
|
+
const { stem: useStemming, fuzzyDistance } = options;
|
|
2567
|
+
const matchesSet = /* @__PURE__ */ new Set();
|
|
2568
|
+
const lineWords = line.toLowerCase().split(/[\W_]+/).filter((w) => w.length > 0);
|
|
2569
|
+
for (const queryWord of queryWords) {
|
|
2570
|
+
const queryLower = queryWord.toLowerCase();
|
|
2571
|
+
const queryStem = useStemming ? stem(queryWord) : null;
|
|
2572
|
+
for (const lineWord of lineWords) {
|
|
2573
|
+
if (matchesSet.has(lineWord)) {
|
|
2574
|
+
continue;
|
|
2575
|
+
}
|
|
2576
|
+
if (lineWord === queryLower) {
|
|
2577
|
+
matchesSet.add(lineWord);
|
|
2578
|
+
continue;
|
|
2579
|
+
}
|
|
2580
|
+
if (queryStem && stem(lineWord) === queryStem) {
|
|
2581
|
+
matchesSet.add(lineWord);
|
|
2582
|
+
continue;
|
|
2583
|
+
}
|
|
2584
|
+
if (fuzzyDistance !== void 0 && fuzzyDistance > 0 && isFuzzyMatch(lineWord, queryLower, fuzzyDistance)) {
|
|
2585
|
+
matchesSet.add(lineWord);
|
|
2586
|
+
}
|
|
2587
|
+
}
|
|
2588
|
+
}
|
|
2589
|
+
return Array.from(matchesSet);
|
|
2590
|
+
};
|
|
2591
|
+
var buildFuzzyHighlightPattern = (query, options = {}) => {
|
|
2592
|
+
const { stem: useStemming } = options;
|
|
2593
|
+
const queryWords = query.toLowerCase().split(/[\W_]+/).filter((w) => w.length > 0);
|
|
2594
|
+
if (queryWords.length === 0) {
|
|
2595
|
+
return /.^/;
|
|
2596
|
+
}
|
|
2597
|
+
const patterns = [];
|
|
2598
|
+
for (const word of queryWords) {
|
|
2599
|
+
const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2600
|
+
if (useStemming) {
|
|
2601
|
+
const wordStem = stem(word);
|
|
2602
|
+
const escapedStem = wordStem.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2603
|
+
patterns.push(`\\b${escapedStem}\\w*\\b`);
|
|
2604
|
+
} else {
|
|
2605
|
+
patterns.push(`\\b${escaped}\\b`);
|
|
2606
|
+
}
|
|
2607
|
+
}
|
|
2608
|
+
return new RegExp(patterns.join("|"), "gi");
|
|
2609
|
+
};
|
|
2610
|
+
|
|
2611
|
+
// src/search/path-matcher.ts
|
|
2612
|
+
var matchPath = (filePath, pattern) => {
|
|
2613
|
+
const DOUBLE_STAR_PLACEHOLDER = "__DOUBLE_STAR_MARKER__";
|
|
2614
|
+
const regexPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*\*/g, DOUBLE_STAR_PLACEHOLDER).replace(/\*/g, "[^/]*").replace(/\?/g, "[^/]").replace(new RegExp(DOUBLE_STAR_PLACEHOLDER, "g"), ".*");
|
|
2615
|
+
const regex = new RegExp(`^${regexPattern}$`, "i");
|
|
2616
|
+
return regex.test(filePath);
|
|
2617
|
+
};
|
|
2618
|
+
|
|
2619
|
+
// src/search/query-parser.ts
|
|
2620
|
+
var tokenize = (query) => {
|
|
2621
|
+
const tokens = [];
|
|
2622
|
+
let i = 0;
|
|
2623
|
+
while (i < query.length) {
|
|
2624
|
+
if (/\s/.test(query[i])) {
|
|
2625
|
+
i++;
|
|
2626
|
+
continue;
|
|
2627
|
+
}
|
|
2628
|
+
if (query[i] === '"') {
|
|
2629
|
+
const start = i + 1;
|
|
2630
|
+
i++;
|
|
2631
|
+
while (i < query.length && query[i] !== '"') {
|
|
2632
|
+
i++;
|
|
2633
|
+
}
|
|
2634
|
+
const value = query.slice(start, i);
|
|
2635
|
+
tokens.push({ type: "PHRASE", value });
|
|
2636
|
+
i++;
|
|
2637
|
+
continue;
|
|
2638
|
+
}
|
|
2639
|
+
if (query[i] === "(") {
|
|
2640
|
+
tokens.push({ type: "LPAREN", value: "(" });
|
|
2641
|
+
i++;
|
|
2642
|
+
continue;
|
|
2643
|
+
}
|
|
2644
|
+
if (query[i] === ")") {
|
|
2645
|
+
tokens.push({ type: "RPAREN", value: ")" });
|
|
2646
|
+
i++;
|
|
2647
|
+
continue;
|
|
2648
|
+
}
|
|
2649
|
+
const wordMatch = query.slice(i).match(/^[^\s()"]+/);
|
|
2650
|
+
if (wordMatch) {
|
|
2651
|
+
const word = wordMatch[0];
|
|
2652
|
+
const upperWord = word.toUpperCase();
|
|
2653
|
+
if (upperWord === "AND") {
|
|
2654
|
+
tokens.push({ type: "AND", value: "AND" });
|
|
2655
|
+
} else if (upperWord === "OR") {
|
|
2656
|
+
tokens.push({ type: "OR", value: "OR" });
|
|
2657
|
+
} else if (upperWord === "NOT") {
|
|
2658
|
+
tokens.push({ type: "NOT", value: "NOT" });
|
|
2659
|
+
} else {
|
|
2660
|
+
tokens.push({ type: "TERM", value: word });
|
|
2661
|
+
}
|
|
2662
|
+
i += word.length;
|
|
2663
|
+
continue;
|
|
2664
|
+
}
|
|
2665
|
+
i++;
|
|
2666
|
+
}
|
|
2667
|
+
return tokens;
|
|
2668
|
+
};
|
|
2669
|
+
var Parser = class {
|
|
2670
|
+
tokens;
|
|
2671
|
+
pos = 0;
|
|
2672
|
+
terms = [];
|
|
2673
|
+
phrases = [];
|
|
2674
|
+
constructor(tokens) {
|
|
2675
|
+
this.tokens = tokens;
|
|
2676
|
+
}
|
|
2677
|
+
current() {
|
|
2678
|
+
return this.tokens[this.pos];
|
|
2679
|
+
}
|
|
2680
|
+
advance() {
|
|
2681
|
+
return this.tokens[this.pos++];
|
|
2682
|
+
}
|
|
2683
|
+
match(type) {
|
|
2684
|
+
if (this.current()?.type === type) {
|
|
2685
|
+
this.advance();
|
|
2686
|
+
return true;
|
|
2687
|
+
}
|
|
2688
|
+
return false;
|
|
2689
|
+
}
|
|
2690
|
+
parse() {
|
|
2691
|
+
if (this.tokens.length === 0) {
|
|
2692
|
+
return null;
|
|
2693
|
+
}
|
|
2694
|
+
return this.parseExpr();
|
|
2695
|
+
}
|
|
2696
|
+
parseExpr() {
|
|
2697
|
+
let left = this.parseAndExpr();
|
|
2698
|
+
while (this.match("OR")) {
|
|
2699
|
+
const right = this.parseAndExpr();
|
|
2700
|
+
left = { type: "or", left, right };
|
|
2701
|
+
}
|
|
2702
|
+
return left;
|
|
2703
|
+
}
|
|
2704
|
+
parseAndExpr() {
|
|
2705
|
+
let left = this.parseNotExpr();
|
|
2706
|
+
while (this.match("AND") || this.isImplicitAnd()) {
|
|
2707
|
+
const right = this.parseNotExpr();
|
|
2708
|
+
left = { type: "and", left, right };
|
|
2709
|
+
}
|
|
2710
|
+
return left;
|
|
2711
|
+
}
|
|
2712
|
+
isImplicitAnd() {
|
|
2713
|
+
const tok = this.current();
|
|
2714
|
+
return tok?.type === "TERM" || tok?.type === "PHRASE" || tok?.type === "NOT" || tok?.type === "LPAREN";
|
|
2715
|
+
}
|
|
2716
|
+
parseNotExpr() {
|
|
2717
|
+
if (this.match("NOT")) {
|
|
2718
|
+
const operand = this.parseNotExpr();
|
|
2719
|
+
return { type: "not", operand };
|
|
2720
|
+
}
|
|
2721
|
+
return this.parsePrimary();
|
|
2722
|
+
}
|
|
2723
|
+
parsePrimary() {
|
|
2724
|
+
const tok = this.current();
|
|
2725
|
+
if (this.match("LPAREN")) {
|
|
2726
|
+
const expr = this.parseExpr();
|
|
2727
|
+
this.match("RPAREN");
|
|
2728
|
+
return expr;
|
|
2729
|
+
}
|
|
2730
|
+
if (tok?.type === "PHRASE") {
|
|
2731
|
+
this.advance();
|
|
2732
|
+
this.phrases.push(tok.value);
|
|
2733
|
+
return { type: "phrase", value: tok.value };
|
|
2734
|
+
}
|
|
2735
|
+
if (tok?.type === "TERM") {
|
|
2736
|
+
this.advance();
|
|
2737
|
+
this.terms.push(tok.value);
|
|
2738
|
+
return { type: "term", value: tok.value };
|
|
2739
|
+
}
|
|
2740
|
+
return { type: "term", value: "" };
|
|
2741
|
+
}
|
|
2742
|
+
};
|
|
2743
|
+
var parseQuery = (query) => {
|
|
2744
|
+
const tokens = tokenize(query);
|
|
2745
|
+
if (tokens.length === 0) {
|
|
2746
|
+
return null;
|
|
2747
|
+
}
|
|
2748
|
+
const parser = new Parser(tokens);
|
|
2749
|
+
const ast = parser.parse();
|
|
2750
|
+
if (!ast) {
|
|
2751
|
+
return null;
|
|
2752
|
+
}
|
|
2753
|
+
return {
|
|
2754
|
+
ast,
|
|
2755
|
+
terms: parser.terms,
|
|
2756
|
+
phrases: parser.phrases
|
|
2757
|
+
};
|
|
2758
|
+
};
|
|
2759
|
+
var isAdvancedQuery = (query) => {
|
|
2760
|
+
const tokens = tokenize(query);
|
|
2761
|
+
return tokens.some(
|
|
2762
|
+
(t) => t.type === "AND" || t.type === "OR" || t.type === "NOT" || t.type === "PHRASE" || t.type === "LPAREN"
|
|
2763
|
+
);
|
|
2764
|
+
};
|
|
2765
|
+
var evaluateQuery = (ast, text) => {
|
|
2766
|
+
const lowerText = text.toLowerCase();
|
|
2767
|
+
const evaluate = (node) => {
|
|
2768
|
+
switch (node.type) {
|
|
2769
|
+
case "term": {
|
|
2770
|
+
if (!node.value) return true;
|
|
2771
|
+
return lowerText.includes(node.value.toLowerCase());
|
|
2772
|
+
}
|
|
2773
|
+
case "phrase": {
|
|
2774
|
+
return lowerText.includes(node.value.toLowerCase());
|
|
2775
|
+
}
|
|
2776
|
+
case "and": {
|
|
2777
|
+
return evaluate(node.left) && evaluate(node.right);
|
|
2778
|
+
}
|
|
2779
|
+
case "or": {
|
|
2780
|
+
return evaluate(node.left) || evaluate(node.right);
|
|
2781
|
+
}
|
|
2782
|
+
case "not": {
|
|
2783
|
+
return !evaluate(node.operand);
|
|
2784
|
+
}
|
|
2785
|
+
}
|
|
2786
|
+
};
|
|
2787
|
+
return evaluate(ast);
|
|
2788
|
+
};
|
|
2789
|
+
var buildHighlightPattern = (parsed) => {
|
|
2790
|
+
const patterns = [];
|
|
2791
|
+
const escapeChars = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2792
|
+
for (const term of parsed.terms) {
|
|
2793
|
+
if (term) {
|
|
2794
|
+
patterns.push(`\\b${escapeChars(term)}\\b`);
|
|
2795
|
+
}
|
|
2796
|
+
}
|
|
2797
|
+
for (const phrase of parsed.phrases) {
|
|
2798
|
+
if (phrase) {
|
|
2799
|
+
patterns.push(escapeChars(phrase));
|
|
2800
|
+
}
|
|
2801
|
+
}
|
|
2802
|
+
if (patterns.length === 0) {
|
|
2803
|
+
return /.^/;
|
|
2804
|
+
}
|
|
2805
|
+
return new RegExp(patterns.join("|"), "gi");
|
|
2806
|
+
};
|
|
2807
|
+
|
|
2808
|
+
// src/search/searcher.ts
|
|
2809
|
+
var search = (rootPath, options = {}) => Effect12.gen(function* () {
|
|
2810
|
+
const storage = createStorage(rootPath);
|
|
2811
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
2812
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
2813
|
+
if (!docIndex || !sectionIndex) {
|
|
2814
|
+
return [];
|
|
2815
|
+
}
|
|
2816
|
+
const results = [];
|
|
2817
|
+
const headingRegex = options.heading ? new RegExp(options.heading, "i") : null;
|
|
2818
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
2819
|
+
if (headingRegex && !headingRegex.test(section.heading)) {
|
|
2820
|
+
continue;
|
|
2821
|
+
}
|
|
2822
|
+
if (options.pathPattern && !matchPath(section.documentPath, options.pathPattern)) {
|
|
2823
|
+
continue;
|
|
2824
|
+
}
|
|
2825
|
+
if (options.hasCode !== void 0 && section.hasCode !== options.hasCode) {
|
|
2826
|
+
continue;
|
|
2827
|
+
}
|
|
2828
|
+
if (options.hasList !== void 0 && section.hasList !== options.hasList) {
|
|
2829
|
+
continue;
|
|
2830
|
+
}
|
|
2831
|
+
if (options.hasTable !== void 0 && section.hasTable !== options.hasTable) {
|
|
2832
|
+
continue;
|
|
2833
|
+
}
|
|
2834
|
+
if (options.minLevel !== void 0 && section.level < options.minLevel) {
|
|
2835
|
+
continue;
|
|
2836
|
+
}
|
|
2837
|
+
if (options.maxLevel !== void 0 && section.level > options.maxLevel) {
|
|
2838
|
+
continue;
|
|
2839
|
+
}
|
|
2840
|
+
const document = docIndex.documents[section.documentPath];
|
|
2841
|
+
if (document) {
|
|
2842
|
+
results.push({ section, document });
|
|
2843
|
+
}
|
|
2844
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
2845
|
+
break;
|
|
2846
|
+
}
|
|
2847
|
+
}
|
|
2848
|
+
return results;
|
|
2849
|
+
});
|
|
2850
|
+
var searchContent = (rootPath, options = {}) => Effect12.gen(function* () {
|
|
2851
|
+
const storage = createStorage(rootPath);
|
|
2852
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
2853
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
2854
|
+
if (!docIndex || !sectionIndex) {
|
|
2855
|
+
return [];
|
|
2856
|
+
}
|
|
2857
|
+
let parsedQuery = null;
|
|
2858
|
+
let contentRegex = null;
|
|
2859
|
+
let highlightRegex = null;
|
|
2860
|
+
const matchOptions = {
|
|
2861
|
+
stem: options.stem,
|
|
2862
|
+
fuzzyDistance: options.fuzzy ? options.fuzzyDistance ?? 2 : void 0
|
|
2863
|
+
};
|
|
2864
|
+
const useFuzzyOrStem = options.fuzzy || options.stem;
|
|
2865
|
+
if (options.content) {
|
|
2866
|
+
if (isAdvancedQuery(options.content)) {
|
|
2867
|
+
parsedQuery = parseQuery(options.content);
|
|
2868
|
+
if (parsedQuery) {
|
|
2869
|
+
if (useFuzzyOrStem) {
|
|
2870
|
+
highlightRegex = buildFuzzyHighlightPattern(
|
|
2871
|
+
options.content,
|
|
2872
|
+
matchOptions
|
|
2873
|
+
);
|
|
2874
|
+
} else {
|
|
2875
|
+
highlightRegex = buildHighlightPattern(parsedQuery);
|
|
2876
|
+
}
|
|
2877
|
+
}
|
|
2878
|
+
} else {
|
|
2879
|
+
if (!useFuzzyOrStem) {
|
|
2880
|
+
contentRegex = new RegExp(options.content, "gi");
|
|
2881
|
+
highlightRegex = contentRegex;
|
|
2882
|
+
} else {
|
|
2883
|
+
highlightRegex = buildFuzzyHighlightPattern(
|
|
2884
|
+
options.content,
|
|
2885
|
+
matchOptions
|
|
2886
|
+
);
|
|
2887
|
+
}
|
|
2888
|
+
}
|
|
2889
|
+
}
|
|
2890
|
+
const headingRegex = options.heading ? new RegExp(options.heading, "i") : null;
|
|
2891
|
+
const results = [];
|
|
2892
|
+
const sectionsByDoc = {};
|
|
2893
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
2894
|
+
const docSections = sectionsByDoc[section.documentPath];
|
|
2895
|
+
if (docSections) {
|
|
2896
|
+
docSections.push(section);
|
|
2897
|
+
} else {
|
|
2898
|
+
sectionsByDoc[section.documentPath] = [section];
|
|
2899
|
+
}
|
|
2900
|
+
}
|
|
2901
|
+
for (const [docPath, sections] of Object.entries(sectionsByDoc)) {
|
|
2902
|
+
if (options.pathPattern && !matchPath(docPath, options.pathPattern)) {
|
|
2903
|
+
continue;
|
|
2904
|
+
}
|
|
2905
|
+
const document = docIndex.documents[docPath];
|
|
2906
|
+
if (!document) continue;
|
|
2907
|
+
let fileContent = null;
|
|
2908
|
+
let fileLines = [];
|
|
2909
|
+
if (parsedQuery || contentRegex || useFuzzyOrStem && options.content) {
|
|
2910
|
+
const filePath = path5.join(storage.rootPath, docPath);
|
|
2911
|
+
try {
|
|
2912
|
+
fileContent = yield* Effect12.promise(
|
|
2913
|
+
() => fs5.readFile(filePath, "utf-8")
|
|
2914
|
+
);
|
|
2915
|
+
fileLines = fileContent.split("\n");
|
|
2916
|
+
} catch {
|
|
2917
|
+
continue;
|
|
2918
|
+
}
|
|
2919
|
+
}
|
|
2920
|
+
for (const section of sections) {
|
|
2921
|
+
if (headingRegex && !headingRegex.test(section.heading)) {
|
|
2922
|
+
continue;
|
|
2923
|
+
}
|
|
2924
|
+
if (options.hasCode !== void 0 && section.hasCode !== options.hasCode) {
|
|
2925
|
+
continue;
|
|
2926
|
+
}
|
|
2927
|
+
if (options.hasList !== void 0 && section.hasList !== options.hasList) {
|
|
2928
|
+
continue;
|
|
2929
|
+
}
|
|
2930
|
+
if (options.hasTable !== void 0 && section.hasTable !== options.hasTable) {
|
|
2931
|
+
continue;
|
|
2932
|
+
}
|
|
2933
|
+
if (options.minLevel !== void 0 && section.level < options.minLevel) {
|
|
2934
|
+
continue;
|
|
2935
|
+
}
|
|
2936
|
+
if (options.maxLevel !== void 0 && section.level > options.maxLevel) {
|
|
2937
|
+
continue;
|
|
2938
|
+
}
|
|
2939
|
+
if ((parsedQuery || contentRegex || useFuzzyOrStem) && fileContent) {
|
|
2940
|
+
const sectionLines = fileLines.slice(
|
|
2941
|
+
section.startLine - 1,
|
|
2942
|
+
section.endLine
|
|
2943
|
+
);
|
|
2944
|
+
const sectionContent = sectionLines.join("\n");
|
|
2945
|
+
if (parsedQuery) {
|
|
2946
|
+
if (!evaluateQuery(parsedQuery.ast, sectionContent)) {
|
|
2947
|
+
continue;
|
|
2948
|
+
}
|
|
2949
|
+
}
|
|
2950
|
+
if (useFuzzyOrStem && !parsedQuery && options.content) {
|
|
2951
|
+
if (!matchesWithOptions(options.content, sectionContent, matchOptions)) {
|
|
2952
|
+
continue;
|
|
2953
|
+
}
|
|
2954
|
+
}
|
|
2955
|
+
const matches = [];
|
|
2956
|
+
const searchRegex = contentRegex || highlightRegex;
|
|
2957
|
+
const contextBefore = options.contextBefore ?? 1;
|
|
2958
|
+
const contextAfter = options.contextAfter ?? 1;
|
|
2959
|
+
const queryWords = options.content ? options.content.toLowerCase().split(/\W+/).filter((w) => w.length > 0) : [];
|
|
2960
|
+
for (let i = 0; i < sectionLines.length; i++) {
|
|
2961
|
+
const line = sectionLines[i];
|
|
2962
|
+
if (!line) continue;
|
|
2963
|
+
let isMatch = false;
|
|
2964
|
+
if (searchRegex) {
|
|
2965
|
+
if (searchRegex.test(line)) {
|
|
2966
|
+
isMatch = true;
|
|
2967
|
+
}
|
|
2968
|
+
searchRegex.lastIndex = 0;
|
|
2969
|
+
}
|
|
2970
|
+
if (!isMatch && useFuzzyOrStem && queryWords.length > 0) {
|
|
2971
|
+
const lineMatches = findMatchesInLine(
|
|
2972
|
+
queryWords,
|
|
2973
|
+
line,
|
|
2974
|
+
matchOptions
|
|
2975
|
+
);
|
|
2976
|
+
if (lineMatches.length > 0) {
|
|
2977
|
+
isMatch = true;
|
|
2978
|
+
}
|
|
2979
|
+
}
|
|
2980
|
+
if (isMatch) {
|
|
2981
|
+
const absoluteLineNum = section.startLine + i;
|
|
2982
|
+
const snippetStart = Math.max(0, i - contextBefore);
|
|
2983
|
+
const snippetEnd = Math.min(
|
|
2984
|
+
sectionLines.length,
|
|
2985
|
+
i + contextAfter + 1
|
|
2986
|
+
);
|
|
2987
|
+
const snippetLines = sectionLines.slice(snippetStart, snippetEnd);
|
|
2988
|
+
const snippet = snippetLines.join("\n");
|
|
2989
|
+
const contextLines = [];
|
|
2990
|
+
for (let j = snippetStart; j < snippetEnd; j++) {
|
|
2991
|
+
const ctxLine = sectionLines[j];
|
|
2992
|
+
if (ctxLine !== void 0) {
|
|
2993
|
+
contextLines.push({
|
|
2994
|
+
lineNumber: section.startLine + j,
|
|
2995
|
+
line: ctxLine,
|
|
2996
|
+
isMatch: j === i
|
|
2997
|
+
});
|
|
2998
|
+
}
|
|
2999
|
+
}
|
|
3000
|
+
matches.push({
|
|
3001
|
+
lineNumber: absoluteLineNum,
|
|
3002
|
+
line,
|
|
3003
|
+
snippet,
|
|
3004
|
+
contextLines
|
|
3005
|
+
});
|
|
3006
|
+
}
|
|
3007
|
+
}
|
|
3008
|
+
if (parsedQuery || matches.length > 0) {
|
|
3009
|
+
const result = {
|
|
3010
|
+
section,
|
|
3011
|
+
document,
|
|
3012
|
+
sectionContent
|
|
3013
|
+
};
|
|
3014
|
+
if (matches.length > 0) {
|
|
3015
|
+
results.push({ ...result, matches });
|
|
3016
|
+
} else {
|
|
3017
|
+
results.push(result);
|
|
3018
|
+
}
|
|
3019
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
3020
|
+
return results;
|
|
3021
|
+
}
|
|
3022
|
+
}
|
|
3023
|
+
} else if (!parsedQuery && !contentRegex && !useFuzzyOrStem) {
|
|
3024
|
+
results.push({ section, document });
|
|
3025
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
3026
|
+
return results;
|
|
3027
|
+
}
|
|
3028
|
+
}
|
|
3029
|
+
}
|
|
3030
|
+
}
|
|
3031
|
+
return results;
|
|
3032
|
+
});
|
|
3033
|
+
|
|
3034
|
+
export {
|
|
3035
|
+
CONFIG_FILE_NAMES,
|
|
3036
|
+
findConfigFile,
|
|
3037
|
+
loadConfigFile,
|
|
3038
|
+
readEnvConfig,
|
|
3039
|
+
createConfigProvider,
|
|
3040
|
+
createConfigProviderSync,
|
|
3041
|
+
ConfigService,
|
|
3042
|
+
ConfigServiceDefault,
|
|
3043
|
+
buildSectionList,
|
|
3044
|
+
formatSectionList,
|
|
3045
|
+
filterExcludedSections,
|
|
3046
|
+
extractSectionContent,
|
|
3047
|
+
formatExtractedSections,
|
|
3048
|
+
formatSummary,
|
|
3049
|
+
formatAssembledContext,
|
|
3050
|
+
summarizeFile,
|
|
3051
|
+
assembleContext,
|
|
3052
|
+
checkPricingFreshness,
|
|
3053
|
+
getPricingDate,
|
|
3054
|
+
estimateEmbeddingCost,
|
|
3055
|
+
buildEmbeddings,
|
|
3056
|
+
semanticSearch,
|
|
3057
|
+
semanticSearchWithStats,
|
|
3058
|
+
getEmbeddingStats,
|
|
3059
|
+
matchPath,
|
|
3060
|
+
isAdvancedQuery,
|
|
3061
|
+
search,
|
|
3062
|
+
searchContent
|
|
3063
|
+
};
|