mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,1713 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ApiKeyInvalidError,
|
|
3
|
+
ApiKeyMissingError,
|
|
4
|
+
countTokensApprox,
|
|
5
|
+
createStorage,
|
|
6
|
+
EmbeddingError,
|
|
7
|
+
EmbeddingsNotFoundError,
|
|
8
|
+
INDEX_DIR,
|
|
9
|
+
IndexNotFoundError,
|
|
10
|
+
loadDocumentIndex,
|
|
11
|
+
loadSectionIndex,
|
|
12
|
+
parseFile,
|
|
13
|
+
VectorStoreError,
|
|
14
|
+
} from "./chunk-QNN4TT23.js";
|
|
15
|
+
|
|
16
|
+
// src/summarize/formatters.ts
|
|
17
|
+
var formatSummary = (summary, options = {}) => {
|
|
18
|
+
const maxTokens = options.maxTokens;
|
|
19
|
+
const flatSections = [];
|
|
20
|
+
const collectSections = (
|
|
21
|
+
section,
|
|
22
|
+
depth = 0,
|
|
23
|
+
parentNumber = "",
|
|
24
|
+
index = 0,
|
|
25
|
+
) => {
|
|
26
|
+
const number = parentNumber
|
|
27
|
+
? `${parentNumber}.${index + 1}`
|
|
28
|
+
: `${index + 1}`;
|
|
29
|
+
flatSections.push({ section, depth, number });
|
|
30
|
+
section.children.forEach((child, i) => {
|
|
31
|
+
collectSections(child, depth + 1, number, i);
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
summary.sections.forEach((section, i) => {
|
|
35
|
+
collectSections(section, 0, "", i);
|
|
36
|
+
});
|
|
37
|
+
const buildOutput = (
|
|
38
|
+
includedSectionIndices,
|
|
39
|
+
truncationInfo,
|
|
40
|
+
includeTopics2,
|
|
41
|
+
) => {
|
|
42
|
+
const lines = [];
|
|
43
|
+
if (
|
|
44
|
+
truncationInfo.showWarning &&
|
|
45
|
+
truncationInfo.truncatedCount > 0 &&
|
|
46
|
+
truncationInfo.tokensTotal > 0
|
|
47
|
+
) {
|
|
48
|
+
const pct = Math.round(
|
|
49
|
+
(truncationInfo.tokensShown / truncationInfo.tokensTotal) * 100,
|
|
50
|
+
);
|
|
51
|
+
lines.push(
|
|
52
|
+
`\u26A0\uFE0F Truncated: Showing ~${truncationInfo.tokensShown}/${truncationInfo.tokensTotal} tokens (${pct}%)`,
|
|
53
|
+
);
|
|
54
|
+
if (truncationInfo.includedNumbers.length > 0) {
|
|
55
|
+
const includedDisplay =
|
|
56
|
+
truncationInfo.includedNumbers.length <= 6
|
|
57
|
+
? truncationInfo.includedNumbers.join(", ")
|
|
58
|
+
: truncationInfo.includedNumbers.slice(0, 5).join(", ") +
|
|
59
|
+
`, ... (+${truncationInfo.includedNumbers.length - 5} more)`;
|
|
60
|
+
lines.push(`Sections included: ${includedDisplay}`);
|
|
61
|
+
}
|
|
62
|
+
if (truncationInfo.excludedNumbers.length > 0) {
|
|
63
|
+
const excludedDisplay =
|
|
64
|
+
truncationInfo.excludedNumbers.length <= 6
|
|
65
|
+
? truncationInfo.excludedNumbers.join(", ")
|
|
66
|
+
: truncationInfo.excludedNumbers.slice(0, 5).join(", ") +
|
|
67
|
+
`, ... (+${truncationInfo.excludedNumbers.length - 5} more)`;
|
|
68
|
+
lines.push(`Sections excluded: ${excludedDisplay}`);
|
|
69
|
+
}
|
|
70
|
+
lines.push(
|
|
71
|
+
"Use --full for complete content or --section to target specific sections.",
|
|
72
|
+
);
|
|
73
|
+
lines.push("");
|
|
74
|
+
}
|
|
75
|
+
lines.push(`# ${summary.title}`);
|
|
76
|
+
lines.push(`Path: ${summary.path}`);
|
|
77
|
+
const tokenLineIndex = lines.length;
|
|
78
|
+
lines.push("PLACEHOLDER");
|
|
79
|
+
lines.push("");
|
|
80
|
+
const fullTopicsLine2 =
|
|
81
|
+
summary.keyTopics.length > 0
|
|
82
|
+
? `**Topics:** ${summary.keyTopics.join(", ")}`
|
|
83
|
+
: "";
|
|
84
|
+
if (includeTopics2 && fullTopicsLine2) {
|
|
85
|
+
lines.push(fullTopicsLine2);
|
|
86
|
+
lines.push("");
|
|
87
|
+
}
|
|
88
|
+
const sectionLines = [];
|
|
89
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
90
|
+
if (!includedSectionIndices.has(i)) continue;
|
|
91
|
+
const { section, depth } = flatSections[i];
|
|
92
|
+
const indent = " ".repeat(depth);
|
|
93
|
+
const prefix = "#".repeat(section.level);
|
|
94
|
+
sectionLines.push(`${indent}${prefix} ${section.heading}`);
|
|
95
|
+
if (section.summary) {
|
|
96
|
+
sectionLines.push(`${indent}${section.summary}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
lines.push(sectionLines.join("\n"));
|
|
100
|
+
const tempOutput = lines.join("\n");
|
|
101
|
+
const tokensWithoutLine = countTokensApprox(
|
|
102
|
+
tempOutput.replace("PLACEHOLDER", ""),
|
|
103
|
+
);
|
|
104
|
+
let estimatedTotal = tokensWithoutLine + 8;
|
|
105
|
+
for (let iter = 0; iter < 3; iter++) {
|
|
106
|
+
const testTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`;
|
|
107
|
+
const testOutput = tempOutput.replace("PLACEHOLDER", testTokenLine);
|
|
108
|
+
const actualTotal = countTokensApprox(testOutput);
|
|
109
|
+
if (actualTotal === estimatedTotal) break;
|
|
110
|
+
estimatedTotal = actualTotal;
|
|
111
|
+
}
|
|
112
|
+
const finalTokenLine = `Tokens: ${estimatedTotal} (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`;
|
|
113
|
+
lines[tokenLineIndex] = finalTokenLine;
|
|
114
|
+
return lines.join("\n");
|
|
115
|
+
};
|
|
116
|
+
if (maxTokens === void 0) {
|
|
117
|
+
const allIndices = new Set(flatSections.map((_, i) => i));
|
|
118
|
+
const hasPriorTruncation = summary.truncated && summary.truncatedCount;
|
|
119
|
+
return buildOutput(
|
|
120
|
+
allIndices,
|
|
121
|
+
{
|
|
122
|
+
showWarning: !!hasPriorTruncation,
|
|
123
|
+
truncatedCount: summary.truncatedCount ?? 0,
|
|
124
|
+
includedNumbers: flatSections.map((s) => s.number),
|
|
125
|
+
excludedNumbers: [],
|
|
126
|
+
tokensShown: summary.summaryTokens,
|
|
127
|
+
tokensTotal: summary.originalTokens,
|
|
128
|
+
},
|
|
129
|
+
true,
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
const includedIndices = /* @__PURE__ */ new Set();
|
|
133
|
+
let truncatedCount = 0;
|
|
134
|
+
let includeTopics = true;
|
|
135
|
+
const SAFETY_MARGIN = 1.15;
|
|
136
|
+
const minHeaderTemplate = [
|
|
137
|
+
`# ${summary.title}`,
|
|
138
|
+
`Path: ${summary.path}`,
|
|
139
|
+
`Tokens: 9999 (${(summary.compressionRatio * 100).toFixed(0)}% reduction from ${summary.originalTokens})`,
|
|
140
|
+
"",
|
|
141
|
+
"",
|
|
142
|
+
].join("\n");
|
|
143
|
+
const minHeaderTokens = Math.ceil(
|
|
144
|
+
countTokensApprox(minHeaderTemplate) * SAFETY_MARGIN,
|
|
145
|
+
);
|
|
146
|
+
const fullTopicsLine =
|
|
147
|
+
summary.keyTopics.length > 0
|
|
148
|
+
? `**Topics:** ${summary.keyTopics.join(", ")}
|
|
149
|
+
`
|
|
150
|
+
: "";
|
|
151
|
+
const topicsTokens = fullTopicsLine
|
|
152
|
+
? Math.ceil(countTokensApprox(fullTopicsLine) * SAFETY_MARGIN)
|
|
153
|
+
: 0;
|
|
154
|
+
const truncationWarningTokens = Math.ceil(
|
|
155
|
+
countTokensApprox(
|
|
156
|
+
`\u26A0\uFE0F Truncated: Showing ~9999/9999 tokens (99%)
|
|
157
|
+
Sections included: 1, 2, 3, 4, 5, ... (+99 more)
|
|
158
|
+
Sections excluded: 6, 7, 8, 9, 10, ... (+99 more)
|
|
159
|
+
Use --full for complete content or --section to target specific sections.
|
|
160
|
+
`,
|
|
161
|
+
) * SAFETY_MARGIN,
|
|
162
|
+
);
|
|
163
|
+
let headerTokens = minHeaderTokens + topicsTokens;
|
|
164
|
+
if (headerTokens >= maxTokens) {
|
|
165
|
+
includeTopics = false;
|
|
166
|
+
headerTokens = minHeaderTokens;
|
|
167
|
+
}
|
|
168
|
+
let contentBudget = maxTokens - headerTokens - truncationWarningTokens;
|
|
169
|
+
let tokensUsed = 0;
|
|
170
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
171
|
+
const { section, depth } = flatSections[i];
|
|
172
|
+
const indent = " ".repeat(depth);
|
|
173
|
+
const prefix = "#".repeat(section.level);
|
|
174
|
+
const sectionContent = section.summary
|
|
175
|
+
? `${indent}${prefix} ${section.heading}
|
|
176
|
+
${indent}${section.summary}`
|
|
177
|
+
: `${indent}${prefix} ${section.heading}`;
|
|
178
|
+
const sectionTokens = Math.ceil(
|
|
179
|
+
countTokensApprox(sectionContent) * SAFETY_MARGIN,
|
|
180
|
+
);
|
|
181
|
+
if (tokensUsed + sectionTokens <= contentBudget) {
|
|
182
|
+
includedIndices.add(i);
|
|
183
|
+
tokensUsed += sectionTokens;
|
|
184
|
+
} else {
|
|
185
|
+
truncatedCount++;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (truncatedCount === 0) {
|
|
189
|
+
contentBudget += truncationWarningTokens;
|
|
190
|
+
}
|
|
191
|
+
const includedNumbers = [];
|
|
192
|
+
const excludedNumbers = [];
|
|
193
|
+
for (let i = 0; i < flatSections.length; i++) {
|
|
194
|
+
if (includedIndices.has(i)) {
|
|
195
|
+
includedNumbers.push(flatSections[i].number);
|
|
196
|
+
} else {
|
|
197
|
+
excludedNumbers.push(flatSections[i].number);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
let tokensShown = 0;
|
|
201
|
+
for (const idx of includedIndices) {
|
|
202
|
+
tokensShown += flatSections[idx].section.summaryTokens;
|
|
203
|
+
}
|
|
204
|
+
let output = buildOutput(
|
|
205
|
+
includedIndices,
|
|
206
|
+
{
|
|
207
|
+
showWarning: truncatedCount > 0,
|
|
208
|
+
truncatedCount,
|
|
209
|
+
includedNumbers,
|
|
210
|
+
excludedNumbers,
|
|
211
|
+
tokensShown,
|
|
212
|
+
tokensTotal: summary.originalTokens,
|
|
213
|
+
},
|
|
214
|
+
includeTopics,
|
|
215
|
+
);
|
|
216
|
+
let actualTokens = countTokensApprox(output);
|
|
217
|
+
const sortedIndices = Array.from(includedIndices).sort((a, b) => b - a);
|
|
218
|
+
let removalIndex = 0;
|
|
219
|
+
while (actualTokens > maxTokens && removalIndex < sortedIndices.length) {
|
|
220
|
+
const indexToRemove = sortedIndices[removalIndex];
|
|
221
|
+
includedIndices.delete(indexToRemove);
|
|
222
|
+
truncatedCount++;
|
|
223
|
+
removalIndex++;
|
|
224
|
+
const removedNumber = flatSections[indexToRemove].number;
|
|
225
|
+
const includedIdx = includedNumbers.indexOf(removedNumber);
|
|
226
|
+
if (includedIdx !== -1) {
|
|
227
|
+
includedNumbers.splice(includedIdx, 1);
|
|
228
|
+
excludedNumbers.push(removedNumber);
|
|
229
|
+
}
|
|
230
|
+
tokensShown -= flatSections[indexToRemove].section.summaryTokens;
|
|
231
|
+
output = buildOutput(
|
|
232
|
+
includedIndices,
|
|
233
|
+
{
|
|
234
|
+
showWarning: true,
|
|
235
|
+
truncatedCount,
|
|
236
|
+
includedNumbers,
|
|
237
|
+
excludedNumbers,
|
|
238
|
+
tokensShown,
|
|
239
|
+
tokensTotal: summary.originalTokens,
|
|
240
|
+
},
|
|
241
|
+
includeTopics,
|
|
242
|
+
);
|
|
243
|
+
actualTokens = countTokensApprox(output);
|
|
244
|
+
}
|
|
245
|
+
if (actualTokens > maxTokens && includeTopics) {
|
|
246
|
+
includeTopics = false;
|
|
247
|
+
output = buildOutput(
|
|
248
|
+
includedIndices,
|
|
249
|
+
{
|
|
250
|
+
showWarning: truncatedCount > 0,
|
|
251
|
+
truncatedCount,
|
|
252
|
+
includedNumbers,
|
|
253
|
+
excludedNumbers,
|
|
254
|
+
tokensShown,
|
|
255
|
+
tokensTotal: summary.originalTokens,
|
|
256
|
+
},
|
|
257
|
+
includeTopics,
|
|
258
|
+
);
|
|
259
|
+
actualTokens = countTokensApprox(output);
|
|
260
|
+
}
|
|
261
|
+
if (actualTokens > maxTokens && truncatedCount > 0) {
|
|
262
|
+
output = buildOutput(
|
|
263
|
+
includedIndices,
|
|
264
|
+
{
|
|
265
|
+
showWarning: false,
|
|
266
|
+
truncatedCount,
|
|
267
|
+
includedNumbers,
|
|
268
|
+
excludedNumbers,
|
|
269
|
+
tokensShown,
|
|
270
|
+
tokensTotal: summary.originalTokens,
|
|
271
|
+
},
|
|
272
|
+
includeTopics,
|
|
273
|
+
);
|
|
274
|
+
actualTokens = countTokensApprox(output);
|
|
275
|
+
}
|
|
276
|
+
return output;
|
|
277
|
+
};
|
|
278
|
+
var formatAssembledContext = (context) => {
|
|
279
|
+
const lines = [];
|
|
280
|
+
lines.push("# Context Assembly");
|
|
281
|
+
lines.push(`Total tokens: ${context.totalTokens}/${context.budget}`);
|
|
282
|
+
lines.push(`Sources: ${context.sources.length}`);
|
|
283
|
+
lines.push("");
|
|
284
|
+
for (const source of context.sources) {
|
|
285
|
+
lines.push("---");
|
|
286
|
+
lines.push("");
|
|
287
|
+
lines.push(source.content);
|
|
288
|
+
}
|
|
289
|
+
if (context.overflow.length > 0) {
|
|
290
|
+
lines.push("---");
|
|
291
|
+
lines.push("");
|
|
292
|
+
lines.push("## Overflow (not included due to budget)");
|
|
293
|
+
for (const overflowPath of context.overflow) {
|
|
294
|
+
lines.push(`- ${overflowPath}`);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
return lines.join("\n");
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
import { Effect } from "effect";
|
|
301
|
+
// src/summarize/summarizer.ts
|
|
302
|
+
import * as fs from "fs/promises";
|
|
303
|
+
import * as path from "path";
|
|
304
|
+
|
|
305
|
+
var TOKEN_BUDGETS = {
|
|
306
|
+
brief: 100,
|
|
307
|
+
summary: 500,
|
|
308
|
+
full: Infinity,
|
|
309
|
+
};
|
|
310
|
+
var MIN_SENTENCE_LENGTH = 10;
|
|
311
|
+
var SENTENCE_SCORE_DEFINITION = 2;
|
|
312
|
+
var SENTENCE_SCORE_PROPER_START = 1;
|
|
313
|
+
var SENTENCE_SCORE_MEDIUM_LENGTH = 1;
|
|
314
|
+
var SENTENCE_SCORE_EMPHASIS = 1;
|
|
315
|
+
var SENTENCE_LENGTH_MIN = 50;
|
|
316
|
+
var SENTENCE_LENGTH_MAX = 200;
|
|
317
|
+
var SUMMARY_COMPRESSION_RATIO = 0.3;
|
|
318
|
+
var MIN_SECTION_TOKENS = 20;
|
|
319
|
+
var MIN_SUMMARY_SENTENCES = 2;
|
|
320
|
+
var TOKENS_PER_SENTENCE_ESTIMATE = 30;
|
|
321
|
+
var MIN_TOPIC_LENGTH = 2;
|
|
322
|
+
var MAX_TOPIC_LENGTH = 50;
|
|
323
|
+
var MAX_TOPICS = 10;
|
|
324
|
+
var MIN_PARTIAL_BUDGET = 50;
|
|
325
|
+
var extractKeyPoints = (content, maxSentences) => {
|
|
326
|
+
const sentences = content
|
|
327
|
+
.replace(/\n+/g, " ")
|
|
328
|
+
.split(/(?<=[.!?])\s+/)
|
|
329
|
+
.filter((s) => s.trim().length > MIN_SENTENCE_LENGTH);
|
|
330
|
+
if (sentences.length <= maxSentences) {
|
|
331
|
+
return sentences;
|
|
332
|
+
}
|
|
333
|
+
const scored = sentences.map((s) => {
|
|
334
|
+
let score = 0;
|
|
335
|
+
if (s.includes(":")) score += SENTENCE_SCORE_DEFINITION;
|
|
336
|
+
if (/^[A-Z]/.test(s)) score += SENTENCE_SCORE_PROPER_START;
|
|
337
|
+
if (s.length > SENTENCE_LENGTH_MIN && s.length < SENTENCE_LENGTH_MAX)
|
|
338
|
+
score += SENTENCE_SCORE_MEDIUM_LENGTH;
|
|
339
|
+
if (/\*\*|`/.test(s)) score += SENTENCE_SCORE_EMPHASIS;
|
|
340
|
+
return { sentence: s, score };
|
|
341
|
+
});
|
|
342
|
+
scored.sort((a, b) => b.score - a.score);
|
|
343
|
+
return scored.slice(0, maxSentences).map((s) => s.sentence);
|
|
344
|
+
};
|
|
345
|
+
var summarizeSection = (section, level) => {
|
|
346
|
+
const originalTokens = section.metadata.tokenCount;
|
|
347
|
+
const children = section.children.map((child) =>
|
|
348
|
+
summarizeSection(child, level),
|
|
349
|
+
);
|
|
350
|
+
const targetTokens = Math.min(
|
|
351
|
+
TOKEN_BUDGETS[level],
|
|
352
|
+
Math.max(originalTokens * SUMMARY_COMPRESSION_RATIO, MIN_SECTION_TOKENS),
|
|
353
|
+
);
|
|
354
|
+
let summary;
|
|
355
|
+
if (level === "full" || originalTokens <= targetTokens) {
|
|
356
|
+
summary = section.plainText;
|
|
357
|
+
} else if (level === "brief") {
|
|
358
|
+
const meta = [];
|
|
359
|
+
if (section.metadata.hasCode) meta.push("code");
|
|
360
|
+
if (section.metadata.hasList) meta.push("list");
|
|
361
|
+
if (section.metadata.hasTable) meta.push("table");
|
|
362
|
+
summary = meta.length > 0 ? `[${meta.join(", ")}]` : "";
|
|
363
|
+
} else {
|
|
364
|
+
const maxSentences = Math.max(
|
|
365
|
+
MIN_SUMMARY_SENTENCES,
|
|
366
|
+
Math.floor(targetTokens / TOKENS_PER_SENTENCE_ESTIMATE),
|
|
367
|
+
);
|
|
368
|
+
const keyPoints = extractKeyPoints(section.plainText, maxSentences);
|
|
369
|
+
if (keyPoints.length > 0) {
|
|
370
|
+
summary = keyPoints.join(" ");
|
|
371
|
+
} else {
|
|
372
|
+
const words = section.plainText.split(/\s+/).slice(0, targetTokens);
|
|
373
|
+
summary =
|
|
374
|
+
words.join(" ") +
|
|
375
|
+
(words.length < section.plainText.split(/\s+/).length ? "..." : "");
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
const summaryTokens = countTokensApprox(summary);
|
|
379
|
+
return {
|
|
380
|
+
heading: section.heading,
|
|
381
|
+
level: section.level,
|
|
382
|
+
originalTokens,
|
|
383
|
+
summaryTokens,
|
|
384
|
+
summary,
|
|
385
|
+
children,
|
|
386
|
+
hasCode: section.metadata.hasCode,
|
|
387
|
+
hasList: section.metadata.hasList,
|
|
388
|
+
hasTable: section.metadata.hasTable,
|
|
389
|
+
};
|
|
390
|
+
};
|
|
391
|
+
var extractTopics = (document) => {
|
|
392
|
+
const topics = /* @__PURE__ */ new Set();
|
|
393
|
+
const processSection = (section) => {
|
|
394
|
+
const cleanHeading = section.heading
|
|
395
|
+
.replace(/[:#\-_]/g, " ")
|
|
396
|
+
.trim()
|
|
397
|
+
.toLowerCase();
|
|
398
|
+
if (
|
|
399
|
+
cleanHeading.length > MIN_TOPIC_LENGTH &&
|
|
400
|
+
cleanHeading.length < MAX_TOPIC_LENGTH
|
|
401
|
+
) {
|
|
402
|
+
topics.add(cleanHeading);
|
|
403
|
+
}
|
|
404
|
+
for (const child of section.children) {
|
|
405
|
+
processSection(child);
|
|
406
|
+
}
|
|
407
|
+
};
|
|
408
|
+
for (const section of document.sections) {
|
|
409
|
+
processSection(section);
|
|
410
|
+
}
|
|
411
|
+
const frontmatter = document.frontmatter;
|
|
412
|
+
if (frontmatter.tags && Array.isArray(frontmatter.tags)) {
|
|
413
|
+
for (const tag of frontmatter.tags) {
|
|
414
|
+
if (typeof tag === "string") {
|
|
415
|
+
topics.add(tag.toLowerCase());
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
return Array.from(topics).slice(0, MAX_TOPICS);
|
|
420
|
+
};
|
|
421
|
+
var summarizeDocument = (document, options = {}) => {
|
|
422
|
+
const level = options.level ?? "summary";
|
|
423
|
+
const maxTokens = options.maxTokens ?? TOKEN_BUDGETS[level];
|
|
424
|
+
const allSections = document.sections.map((s) => summarizeSection(s, level));
|
|
425
|
+
const originalTokens = document.metadata.tokenCount;
|
|
426
|
+
let totalSummaryTokens = 0;
|
|
427
|
+
const flatSections = [];
|
|
428
|
+
const flattenWithTokens = (section) => {
|
|
429
|
+
flatSections.push(section);
|
|
430
|
+
totalSummaryTokens += section.summaryTokens;
|
|
431
|
+
for (const child of section.children) {
|
|
432
|
+
flattenWithTokens(child);
|
|
433
|
+
}
|
|
434
|
+
};
|
|
435
|
+
for (const section of allSections) {
|
|
436
|
+
flattenWithTokens(section);
|
|
437
|
+
}
|
|
438
|
+
const topics = extractTopics(document);
|
|
439
|
+
const headerTemplate = `# ${document.title}
|
|
440
|
+
Path: ${document.path}
|
|
441
|
+
Tokens: 9999 (99% reduction from ${document.metadata.tokenCount})
|
|
442
|
+
`;
|
|
443
|
+
const topicsLine =
|
|
444
|
+
topics.length > 0
|
|
445
|
+
? `
|
|
446
|
+
**Topics:** ${topics.join(", ")}
|
|
447
|
+
`
|
|
448
|
+
: "";
|
|
449
|
+
const truncationWarning =
|
|
450
|
+
"\n\u26A0\uFE0F TRUNCATED: 999 sections omitted to fit token budget";
|
|
451
|
+
const baseOverhead = countTokensApprox(
|
|
452
|
+
headerTemplate + topicsLine + truncationWarning,
|
|
453
|
+
);
|
|
454
|
+
const formattingOverhead = Math.ceil(baseOverhead * 1.2) + 20;
|
|
455
|
+
const contentBudget = maxTokens - formattingOverhead;
|
|
456
|
+
let truncated = false;
|
|
457
|
+
let truncatedCount = 0;
|
|
458
|
+
let sections;
|
|
459
|
+
let summaryTokens;
|
|
460
|
+
if (totalSummaryTokens > contentBudget && contentBudget > 0) {
|
|
461
|
+
let tokensUsed = 0;
|
|
462
|
+
const truncateSections = (sectionList) => {
|
|
463
|
+
const result2 = [];
|
|
464
|
+
for (const section of sectionList) {
|
|
465
|
+
const sectionOwnTokens = section.summaryTokens;
|
|
466
|
+
const fitsInBudget = tokensUsed + sectionOwnTokens <= contentBudget;
|
|
467
|
+
if (fitsInBudget) {
|
|
468
|
+
tokensUsed += sectionOwnTokens;
|
|
469
|
+
const truncatedChildren = truncateSections(section.children);
|
|
470
|
+
result2.push({
|
|
471
|
+
...section,
|
|
472
|
+
children: truncatedChildren,
|
|
473
|
+
});
|
|
474
|
+
} else {
|
|
475
|
+
truncatedCount++;
|
|
476
|
+
const rescuedChildren = truncateSections(section.children);
|
|
477
|
+
result2.push(...rescuedChildren);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
return result2;
|
|
481
|
+
};
|
|
482
|
+
sections = truncateSections(allSections);
|
|
483
|
+
summaryTokens = tokensUsed;
|
|
484
|
+
truncated = truncatedCount > 0;
|
|
485
|
+
} else {
|
|
486
|
+
sections = allSections;
|
|
487
|
+
summaryTokens = totalSummaryTokens;
|
|
488
|
+
}
|
|
489
|
+
const compressionRatio =
|
|
490
|
+
originalTokens > 0 ? 1 - summaryTokens / originalTokens : 0;
|
|
491
|
+
const result = {
|
|
492
|
+
path: document.path,
|
|
493
|
+
title: document.title,
|
|
494
|
+
originalTokens,
|
|
495
|
+
summaryTokens,
|
|
496
|
+
compressionRatio,
|
|
497
|
+
sections,
|
|
498
|
+
keyTopics: topics,
|
|
499
|
+
};
|
|
500
|
+
if (truncated) {
|
|
501
|
+
return {
|
|
502
|
+
...result,
|
|
503
|
+
truncated: true,
|
|
504
|
+
truncatedCount,
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
return result;
|
|
508
|
+
};
|
|
509
|
+
var summarizeFile = (filePath, options = {}) =>
|
|
510
|
+
Effect.gen(function* () {
|
|
511
|
+
const document = yield* parseFile(filePath);
|
|
512
|
+
return summarizeDocument(document, options);
|
|
513
|
+
});
|
|
514
|
+
var assembleContext = (rootPath, sourcePaths, options) =>
|
|
515
|
+
Effect.gen(function* () {
|
|
516
|
+
const budget = options.budget;
|
|
517
|
+
const level = options.level ?? "summary";
|
|
518
|
+
const sources = [];
|
|
519
|
+
const overflow = [];
|
|
520
|
+
let totalTokens = 0;
|
|
521
|
+
const perSourceBudget = Math.floor(budget / sourcePaths.length);
|
|
522
|
+
for (const sourcePath of sourcePaths) {
|
|
523
|
+
const resolvedPath = path.isAbsolute(sourcePath)
|
|
524
|
+
? sourcePath
|
|
525
|
+
: path.join(rootPath, sourcePath);
|
|
526
|
+
const summaryResult = yield* summarizeFile(resolvedPath, {
|
|
527
|
+
level,
|
|
528
|
+
maxTokens: perSourceBudget,
|
|
529
|
+
}).pipe(
|
|
530
|
+
Effect.map((s) => s),
|
|
531
|
+
// Log error for observability before gracefully degrading
|
|
532
|
+
Effect.tapError((error) =>
|
|
533
|
+
Effect.logError(`Failed to summarize ${sourcePath}`, error),
|
|
534
|
+
),
|
|
535
|
+
// Note: catchAll intentional for batch processing - individual file
|
|
536
|
+
// failures add to overflow instead of stopping assembly
|
|
537
|
+
Effect.catchAll(() => Effect.succeed(null)),
|
|
538
|
+
);
|
|
539
|
+
if (!summaryResult) {
|
|
540
|
+
overflow.push(sourcePath);
|
|
541
|
+
continue;
|
|
542
|
+
}
|
|
543
|
+
const summary = summaryResult;
|
|
544
|
+
const content = formatSummary(summary);
|
|
545
|
+
const tokens = countTokensApprox(content);
|
|
546
|
+
if (totalTokens + tokens <= budget) {
|
|
547
|
+
sources.push({
|
|
548
|
+
path: path.relative(rootPath, resolvedPath),
|
|
549
|
+
title: summary.title,
|
|
550
|
+
tokens,
|
|
551
|
+
content,
|
|
552
|
+
});
|
|
553
|
+
totalTokens += tokens;
|
|
554
|
+
} else {
|
|
555
|
+
const remaining = budget - totalTokens;
|
|
556
|
+
if (remaining > MIN_PARTIAL_BUDGET) {
|
|
557
|
+
const briefSummary = yield* summarizeFile(resolvedPath, {
|
|
558
|
+
level: "brief",
|
|
559
|
+
maxTokens: remaining,
|
|
560
|
+
}).pipe(
|
|
561
|
+
Effect.map((s) => s),
|
|
562
|
+
// Log error for observability before gracefully degrading
|
|
563
|
+
Effect.tapError((error) =>
|
|
564
|
+
Effect.logError(
|
|
565
|
+
`Failed to create brief summary for ${sourcePath}`,
|
|
566
|
+
error,
|
|
567
|
+
),
|
|
568
|
+
),
|
|
569
|
+
Effect.catchAll(() => Effect.succeed(null)),
|
|
570
|
+
);
|
|
571
|
+
if (briefSummary) {
|
|
572
|
+
const briefContent = formatSummary(briefSummary);
|
|
573
|
+
const briefTokens = countTokensApprox(briefContent);
|
|
574
|
+
sources.push({
|
|
575
|
+
path: path.relative(rootPath, resolvedPath),
|
|
576
|
+
title: briefSummary.title,
|
|
577
|
+
tokens: briefTokens,
|
|
578
|
+
content: briefContent,
|
|
579
|
+
});
|
|
580
|
+
totalTokens += briefTokens;
|
|
581
|
+
} else {
|
|
582
|
+
overflow.push(path.relative(rootPath, resolvedPath));
|
|
583
|
+
}
|
|
584
|
+
} else {
|
|
585
|
+
overflow.push(path.relative(rootPath, resolvedPath));
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
return {
|
|
590
|
+
sources,
|
|
591
|
+
totalTokens,
|
|
592
|
+
budget,
|
|
593
|
+
overflow,
|
|
594
|
+
};
|
|
595
|
+
});
|
|
596
|
+
|
|
597
|
+
// src/embeddings/openai-provider.ts
|
|
598
|
+
import { Effect as Effect2, Effect as Effect4 } from "effect";
|
|
599
|
+
// src/embeddings/semantic-search.ts
|
|
600
|
+
import * as fs3 from "fs/promises";
|
|
601
|
+
import OpenAI from "openai";
|
|
602
|
+
import * as path3 from "path";
|
|
603
|
+
|
|
604
|
+
var PRICING_DATA = {
|
|
605
|
+
/** Last update date in YYYY-MM format */
|
|
606
|
+
lastUpdated: "2024-09",
|
|
607
|
+
/** Source URL for verification */
|
|
608
|
+
source: "https://platform.openai.com/docs/pricing",
|
|
609
|
+
/** Prices per 1M tokens by model */
|
|
610
|
+
prices: {
|
|
611
|
+
"text-embedding-3-small": 0.02,
|
|
612
|
+
"text-embedding-3-large": 0.13,
|
|
613
|
+
"text-embedding-ada-002": 0.1,
|
|
614
|
+
},
|
|
615
|
+
};
|
|
616
|
+
var checkPricingFreshness = () => {
|
|
617
|
+
const [year, month] = PRICING_DATA.lastUpdated.split("-").map(Number);
|
|
618
|
+
if (!year || !month) return null;
|
|
619
|
+
const lastUpdated = new Date(year, month - 1, 1);
|
|
620
|
+
const now = /* @__PURE__ */ new Date();
|
|
621
|
+
const daysSince = Math.floor(
|
|
622
|
+
(now.getTime() - lastUpdated.getTime()) / (1e3 * 60 * 60 * 24),
|
|
623
|
+
);
|
|
624
|
+
if (daysSince > 90) {
|
|
625
|
+
return `Pricing data is ${daysSince} days old. May not reflect current rates.`;
|
|
626
|
+
}
|
|
627
|
+
return null;
|
|
628
|
+
};
|
|
629
|
+
var getPricingDate = () => PRICING_DATA.lastUpdated;
|
|
630
|
+
var OpenAIProvider = class _OpenAIProvider {
|
|
631
|
+
name;
|
|
632
|
+
dimensions;
|
|
633
|
+
client;
|
|
634
|
+
model;
|
|
635
|
+
batchSize;
|
|
636
|
+
constructor(apiKey, options = {}) {
|
|
637
|
+
this.client = new OpenAI({ apiKey });
|
|
638
|
+
this.model = options.model ?? "text-embedding-3-small";
|
|
639
|
+
this.batchSize = options.batchSize ?? 100;
|
|
640
|
+
this.name = `openai:${this.model}`;
|
|
641
|
+
this.dimensions = 512;
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Create an OpenAI provider instance.
|
|
645
|
+
* Returns an Effect that fails with ApiKeyMissingError if no API key is available.
|
|
646
|
+
*/
|
|
647
|
+
static create(options = {}) {
|
|
648
|
+
const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY;
|
|
649
|
+
if (!apiKey) {
|
|
650
|
+
return Effect2.fail(
|
|
651
|
+
new ApiKeyMissingError({
|
|
652
|
+
provider: "OpenAI",
|
|
653
|
+
envVar: "OPENAI_API_KEY",
|
|
654
|
+
}),
|
|
655
|
+
);
|
|
656
|
+
}
|
|
657
|
+
return Effect2.succeed(new _OpenAIProvider(apiKey, options));
|
|
658
|
+
}
|
|
659
|
+
async embed(texts) {
|
|
660
|
+
if (texts.length === 0) {
|
|
661
|
+
return { embeddings: [], tokensUsed: 0, cost: 0 };
|
|
662
|
+
}
|
|
663
|
+
const allEmbeddings = [];
|
|
664
|
+
let totalTokens = 0;
|
|
665
|
+
try {
|
|
666
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
667
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
668
|
+
const response = await this.client.embeddings.create({
|
|
669
|
+
model: this.model,
|
|
670
|
+
input: batch,
|
|
671
|
+
dimensions: 512,
|
|
672
|
+
// Ensure consistent dimensions
|
|
673
|
+
});
|
|
674
|
+
for (const item of response.data) {
|
|
675
|
+
allEmbeddings.push(item.embedding);
|
|
676
|
+
}
|
|
677
|
+
totalTokens += response.usage?.total_tokens ?? 0;
|
|
678
|
+
}
|
|
679
|
+
} catch (error) {
|
|
680
|
+
if (error instanceof OpenAI.AuthenticationError) {
|
|
681
|
+
throw new ApiKeyInvalidError({
|
|
682
|
+
provider: "OpenAI",
|
|
683
|
+
details: error.message,
|
|
684
|
+
});
|
|
685
|
+
}
|
|
686
|
+
throw error;
|
|
687
|
+
}
|
|
688
|
+
const pricePerMillion = PRICING_DATA.prices[this.model] ?? 0.02;
|
|
689
|
+
const cost = (totalTokens / 1e6) * pricePerMillion;
|
|
690
|
+
return {
|
|
691
|
+
embeddings: allEmbeddings,
|
|
692
|
+
tokensUsed: totalTokens,
|
|
693
|
+
cost,
|
|
694
|
+
};
|
|
695
|
+
}
|
|
696
|
+
};
|
|
697
|
+
var createOpenAIProvider = (options) => OpenAIProvider.create(options);
|
|
698
|
+
var wrapEmbedding = (embedPromise) =>
|
|
699
|
+
Effect2.tryPromise({
|
|
700
|
+
try: () => embedPromise,
|
|
701
|
+
catch: (e) => {
|
|
702
|
+
if (e instanceof ApiKeyInvalidError) {
|
|
703
|
+
return e;
|
|
704
|
+
}
|
|
705
|
+
return new EmbeddingError({
|
|
706
|
+
reason: "Unknown",
|
|
707
|
+
message: e instanceof Error ? e.message : String(e),
|
|
708
|
+
provider: "OpenAI",
|
|
709
|
+
cause: e,
|
|
710
|
+
});
|
|
711
|
+
},
|
|
712
|
+
});
|
|
713
|
+
|
|
714
|
+
import { Effect as Effect3 } from "effect";
|
|
715
|
+
// src/embeddings/vector-store.ts
|
|
716
|
+
import * as fs2 from "fs/promises";
|
|
717
|
+
import HierarchicalNSW from "hnswlib-node";
|
|
718
|
+
import * as path2 from "path";
|
|
719
|
+
|
|
720
|
+
var VECTOR_INDEX_FILE = "vectors.bin";
|
|
721
|
+
var VECTOR_META_FILE = "vectors.meta.json";
|
|
722
|
+
var INDEX_VERSION = 1;
|
|
723
|
+
var HnswVectorStore = class {
|
|
724
|
+
rootPath;
|
|
725
|
+
dimensions;
|
|
726
|
+
index = null;
|
|
727
|
+
entries = /* @__PURE__ */ new Map();
|
|
728
|
+
idToIndex = /* @__PURE__ */ new Map();
|
|
729
|
+
nextIndex = 0;
|
|
730
|
+
provider = "unknown";
|
|
731
|
+
totalCost = 0;
|
|
732
|
+
totalTokens = 0;
|
|
733
|
+
constructor(rootPath, dimensions) {
|
|
734
|
+
this.rootPath = path2.resolve(rootPath);
|
|
735
|
+
this.dimensions = dimensions;
|
|
736
|
+
}
|
|
737
|
+
getIndexDir() {
|
|
738
|
+
return path2.join(this.rootPath, INDEX_DIR);
|
|
739
|
+
}
|
|
740
|
+
getVectorPath() {
|
|
741
|
+
return path2.join(this.getIndexDir(), VECTOR_INDEX_FILE);
|
|
742
|
+
}
|
|
743
|
+
getMetaPath() {
|
|
744
|
+
return path2.join(this.getIndexDir(), VECTOR_META_FILE);
|
|
745
|
+
}
|
|
746
|
+
ensureIndex() {
|
|
747
|
+
if (!this.index) {
|
|
748
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
749
|
+
"cosine",
|
|
750
|
+
this.dimensions,
|
|
751
|
+
);
|
|
752
|
+
this.index.initIndex(1e4, 16, 200, 100);
|
|
753
|
+
}
|
|
754
|
+
return this.index;
|
|
755
|
+
}
|
|
756
|
+
add(entries) {
|
|
757
|
+
return Effect3.try({
|
|
758
|
+
try: () => {
|
|
759
|
+
const index = this.ensureIndex();
|
|
760
|
+
for (const entry of entries) {
|
|
761
|
+
if (this.idToIndex.has(entry.id)) {
|
|
762
|
+
continue;
|
|
763
|
+
}
|
|
764
|
+
const idx = this.nextIndex++;
|
|
765
|
+
if (idx >= index.getMaxElements()) {
|
|
766
|
+
index.resizeIndex(index.getMaxElements() * 2);
|
|
767
|
+
}
|
|
768
|
+
index.addPoint(entry.embedding, idx);
|
|
769
|
+
this.entries.set(idx, entry);
|
|
770
|
+
this.idToIndex.set(entry.id, idx);
|
|
771
|
+
}
|
|
772
|
+
},
|
|
773
|
+
catch: (e) =>
|
|
774
|
+
new VectorStoreError({
|
|
775
|
+
operation: "add",
|
|
776
|
+
message: e instanceof Error ? e.message : String(e),
|
|
777
|
+
cause: e,
|
|
778
|
+
}),
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
search(vector, limit, threshold = 0) {
|
|
782
|
+
return Effect3.try({
|
|
783
|
+
try: () => {
|
|
784
|
+
if (!this.index || this.entries.size === 0) {
|
|
785
|
+
return [];
|
|
786
|
+
}
|
|
787
|
+
const result = this.index.searchKnn(
|
|
788
|
+
vector,
|
|
789
|
+
Math.min(limit, this.entries.size),
|
|
790
|
+
);
|
|
791
|
+
const results = [];
|
|
792
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
793
|
+
const idx = result.neighbors[i];
|
|
794
|
+
const distance = result.distances[i];
|
|
795
|
+
if (idx === void 0 || distance === void 0) {
|
|
796
|
+
continue;
|
|
797
|
+
}
|
|
798
|
+
const similarity = 1 - distance;
|
|
799
|
+
if (similarity < threshold) {
|
|
800
|
+
continue;
|
|
801
|
+
}
|
|
802
|
+
const entry = this.entries.get(idx);
|
|
803
|
+
if (entry) {
|
|
804
|
+
results.push({
|
|
805
|
+
id: entry.id,
|
|
806
|
+
sectionId: entry.sectionId,
|
|
807
|
+
documentPath: entry.documentPath,
|
|
808
|
+
heading: entry.heading,
|
|
809
|
+
similarity,
|
|
810
|
+
});
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
return results;
|
|
814
|
+
},
|
|
815
|
+
catch: (e) =>
|
|
816
|
+
new VectorStoreError({
|
|
817
|
+
operation: "search",
|
|
818
|
+
message: e instanceof Error ? e.message : String(e),
|
|
819
|
+
cause: e,
|
|
820
|
+
}),
|
|
821
|
+
});
|
|
822
|
+
}
|
|
823
|
+
save() {
|
|
824
|
+
return Effect3.gen(
|
|
825
|
+
function* () {
|
|
826
|
+
if (!this.index) {
|
|
827
|
+
return;
|
|
828
|
+
}
|
|
829
|
+
const indexDir = this.getIndexDir();
|
|
830
|
+
yield* Effect3.tryPromise({
|
|
831
|
+
try: () => fs2.mkdir(indexDir, { recursive: true }),
|
|
832
|
+
catch: (e) =>
|
|
833
|
+
new VectorStoreError({
|
|
834
|
+
operation: "save",
|
|
835
|
+
message: `Failed to create directory: ${e instanceof Error ? e.message : String(e)}`,
|
|
836
|
+
cause: e,
|
|
837
|
+
}),
|
|
838
|
+
});
|
|
839
|
+
yield* Effect3.tryPromise({
|
|
840
|
+
try: () => this.index.writeIndex(this.getVectorPath()),
|
|
841
|
+
catch: (e) =>
|
|
842
|
+
new VectorStoreError({
|
|
843
|
+
operation: "save",
|
|
844
|
+
message: `Failed to write index: ${e instanceof Error ? e.message : String(e)}`,
|
|
845
|
+
cause: e,
|
|
846
|
+
}),
|
|
847
|
+
});
|
|
848
|
+
const meta = {
|
|
849
|
+
version: INDEX_VERSION,
|
|
850
|
+
provider: this.provider,
|
|
851
|
+
dimensions: this.dimensions,
|
|
852
|
+
entries: Object.fromEntries(
|
|
853
|
+
Array.from(this.entries.entries()).map(([idx, entry]) => [
|
|
854
|
+
idx.toString(),
|
|
855
|
+
entry,
|
|
856
|
+
]),
|
|
857
|
+
),
|
|
858
|
+
totalCost: this.totalCost,
|
|
859
|
+
totalTokens: this.totalTokens,
|
|
860
|
+
createdAt: /* @__PURE__ */ new Date().toISOString(),
|
|
861
|
+
updatedAt: /* @__PURE__ */ new Date().toISOString(),
|
|
862
|
+
};
|
|
863
|
+
yield* Effect3.tryPromise({
|
|
864
|
+
try: () =>
|
|
865
|
+
fs2.writeFile(this.getMetaPath(), JSON.stringify(meta, null, 2)),
|
|
866
|
+
catch: (e) =>
|
|
867
|
+
new VectorStoreError({
|
|
868
|
+
operation: "save",
|
|
869
|
+
message: `Failed to write metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
870
|
+
cause: e,
|
|
871
|
+
}),
|
|
872
|
+
});
|
|
873
|
+
}.bind(this),
|
|
874
|
+
);
|
|
875
|
+
}
|
|
876
|
+
load() {
|
|
877
|
+
return Effect3.gen(
|
|
878
|
+
function* () {
|
|
879
|
+
const vectorPath = this.getVectorPath();
|
|
880
|
+
const metaPath = this.getMetaPath();
|
|
881
|
+
const filesExist = yield* Effect3.tryPromise({
|
|
882
|
+
try: async () => {
|
|
883
|
+
await fs2.access(vectorPath);
|
|
884
|
+
await fs2.access(metaPath);
|
|
885
|
+
return true;
|
|
886
|
+
},
|
|
887
|
+
catch: () =>
|
|
888
|
+
new VectorStoreError({
|
|
889
|
+
operation: "load",
|
|
890
|
+
message: "Files not found",
|
|
891
|
+
}),
|
|
892
|
+
}).pipe(
|
|
893
|
+
Effect3.catchTag("VectorStoreError", () => Effect3.succeed(false)),
|
|
894
|
+
);
|
|
895
|
+
if (!filesExist) {
|
|
896
|
+
return false;
|
|
897
|
+
}
|
|
898
|
+
const metaContent = yield* Effect3.tryPromise({
|
|
899
|
+
try: () => fs2.readFile(metaPath, "utf-8"),
|
|
900
|
+
catch: (e) =>
|
|
901
|
+
new VectorStoreError({
|
|
902
|
+
operation: "load",
|
|
903
|
+
message: `Failed to read metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
904
|
+
cause: e,
|
|
905
|
+
}),
|
|
906
|
+
});
|
|
907
|
+
const meta = yield* Effect3.try({
|
|
908
|
+
try: () => JSON.parse(metaContent),
|
|
909
|
+
catch: (e) =>
|
|
910
|
+
new VectorStoreError({
|
|
911
|
+
operation: "load",
|
|
912
|
+
message: `Failed to parse metadata: ${e instanceof Error ? e.message : String(e)}`,
|
|
913
|
+
cause: e,
|
|
914
|
+
}),
|
|
915
|
+
});
|
|
916
|
+
if (meta.dimensions !== this.dimensions) {
|
|
917
|
+
return false;
|
|
918
|
+
}
|
|
919
|
+
this.index = new HierarchicalNSW.HierarchicalNSW(
|
|
920
|
+
"cosine",
|
|
921
|
+
this.dimensions,
|
|
922
|
+
);
|
|
923
|
+
yield* Effect3.tryPromise({
|
|
924
|
+
try: () => this.index.readIndex(vectorPath),
|
|
925
|
+
catch: (e) =>
|
|
926
|
+
new VectorStoreError({
|
|
927
|
+
operation: "load",
|
|
928
|
+
message: `Failed to read index: ${e instanceof Error ? e.message : String(e)}`,
|
|
929
|
+
cause: e,
|
|
930
|
+
}),
|
|
931
|
+
});
|
|
932
|
+
this.entries.clear();
|
|
933
|
+
this.idToIndex.clear();
|
|
934
|
+
this.nextIndex = 0;
|
|
935
|
+
for (const [idxStr, entry] of Object.entries(meta.entries)) {
|
|
936
|
+
const idx = parseInt(idxStr, 10);
|
|
937
|
+
this.entries.set(idx, entry);
|
|
938
|
+
this.idToIndex.set(entry.id, idx);
|
|
939
|
+
this.nextIndex = Math.max(this.nextIndex, idx + 1);
|
|
940
|
+
}
|
|
941
|
+
this.provider = meta.provider;
|
|
942
|
+
this.totalCost = meta.totalCost;
|
|
943
|
+
this.totalTokens = meta.totalTokens;
|
|
944
|
+
return true;
|
|
945
|
+
}.bind(this),
|
|
946
|
+
);
|
|
947
|
+
}
|
|
948
|
+
getStats() {
|
|
949
|
+
return {
|
|
950
|
+
count: this.entries.size,
|
|
951
|
+
dimensions: this.dimensions,
|
|
952
|
+
provider: this.provider,
|
|
953
|
+
totalCost: this.totalCost,
|
|
954
|
+
totalTokens: this.totalTokens,
|
|
955
|
+
};
|
|
956
|
+
}
|
|
957
|
+
setProvider(name) {
|
|
958
|
+
this.provider = name;
|
|
959
|
+
}
|
|
960
|
+
addCost(cost, tokens) {
|
|
961
|
+
this.totalCost += cost;
|
|
962
|
+
this.totalTokens += tokens;
|
|
963
|
+
}
|
|
964
|
+
};
|
|
965
|
+
var createVectorStore = (rootPath, dimensions) =>
|
|
966
|
+
new HnswVectorStore(rootPath, dimensions);
|
|
967
|
+
|
|
968
|
+
// src/embeddings/semantic-search.ts
|
|
969
|
+
var generateEmbeddingText = (
|
|
970
|
+
section,
|
|
971
|
+
content,
|
|
972
|
+
documentTitle,
|
|
973
|
+
parentHeading,
|
|
974
|
+
) => {
|
|
975
|
+
const parts = [];
|
|
976
|
+
parts.push(`# ${section.heading}`);
|
|
977
|
+
if (parentHeading) {
|
|
978
|
+
parts.push(`Parent section: ${parentHeading}`);
|
|
979
|
+
}
|
|
980
|
+
parts.push(`Document: ${documentTitle}`);
|
|
981
|
+
parts.push("");
|
|
982
|
+
parts.push(content);
|
|
983
|
+
return parts.join("\n");
|
|
984
|
+
};
|
|
985
|
+
var EMBEDDING_PRICE_PER_MILLION =
|
|
986
|
+
PRICING_DATA.prices["text-embedding-3-small"] ?? 0.02;
|
|
987
|
+
var estimateEmbeddingCost = (rootPath, options = {}) =>
|
|
988
|
+
Effect4.gen(function* () {
|
|
989
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
990
|
+
const storage = createStorage(resolvedRoot);
|
|
991
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
992
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
993
|
+
if (!docIndex || !sectionIndex) {
|
|
994
|
+
return yield* Effect4.fail(
|
|
995
|
+
new IndexNotFoundError({ path: resolvedRoot }),
|
|
996
|
+
);
|
|
997
|
+
}
|
|
998
|
+
const byDir = /* @__PURE__ */ new Map();
|
|
999
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
1000
|
+
if (section.tokenCount < 10) continue;
|
|
1001
|
+
if (options.excludePatterns?.length) {
|
|
1002
|
+
const excluded = options.excludePatterns.some((pattern) => {
|
|
1003
|
+
const regex = new RegExp(
|
|
1004
|
+
`^${pattern.replace(/\*/g, ".*").replace(/\?/g, ".")}$`,
|
|
1005
|
+
);
|
|
1006
|
+
return regex.test(section.documentPath);
|
|
1007
|
+
});
|
|
1008
|
+
if (excluded) continue;
|
|
1009
|
+
}
|
|
1010
|
+
const dir = path3.dirname(section.documentPath) || ".";
|
|
1011
|
+
if (!byDir.has(dir)) {
|
|
1012
|
+
byDir.set(dir, {
|
|
1013
|
+
files: /* @__PURE__ */ new Set(),
|
|
1014
|
+
sections: 0,
|
|
1015
|
+
tokens: 0,
|
|
1016
|
+
});
|
|
1017
|
+
}
|
|
1018
|
+
const entry = byDir.get(dir);
|
|
1019
|
+
entry.files.add(section.documentPath);
|
|
1020
|
+
entry.sections++;
|
|
1021
|
+
entry.tokens += section.tokenCount;
|
|
1022
|
+
}
|
|
1023
|
+
const directoryEstimates = [];
|
|
1024
|
+
let totalFiles = 0;
|
|
1025
|
+
let totalSections = 0;
|
|
1026
|
+
let totalTokens = 0;
|
|
1027
|
+
for (const [dir, data] of byDir) {
|
|
1028
|
+
directoryEstimates.push({
|
|
1029
|
+
directory: dir,
|
|
1030
|
+
fileCount: data.files.size,
|
|
1031
|
+
sectionCount: data.sections,
|
|
1032
|
+
estimatedTokens: data.tokens,
|
|
1033
|
+
estimatedCost: (data.tokens / 1e6) * EMBEDDING_PRICE_PER_MILLION,
|
|
1034
|
+
});
|
|
1035
|
+
totalFiles += data.files.size;
|
|
1036
|
+
totalSections += data.sections;
|
|
1037
|
+
totalTokens += data.tokens;
|
|
1038
|
+
}
|
|
1039
|
+
directoryEstimates.sort((a, b) => a.directory.localeCompare(b.directory));
|
|
1040
|
+
const estimatedTimeSeconds = Math.ceil(totalSections / 100) * 1.5;
|
|
1041
|
+
return {
|
|
1042
|
+
totalFiles,
|
|
1043
|
+
totalSections,
|
|
1044
|
+
totalTokens,
|
|
1045
|
+
totalCost: (totalTokens / 1e6) * EMBEDDING_PRICE_PER_MILLION,
|
|
1046
|
+
estimatedTimeSeconds,
|
|
1047
|
+
byDirectory: directoryEstimates,
|
|
1048
|
+
};
|
|
1049
|
+
});
|
|
1050
|
+
var buildEmbeddings = (rootPath, options = {}) =>
|
|
1051
|
+
Effect4.gen(function* () {
|
|
1052
|
+
const startTime = Date.now();
|
|
1053
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
1054
|
+
const storage = createStorage(resolvedRoot);
|
|
1055
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
1056
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
1057
|
+
if (!docIndex || !sectionIndex) {
|
|
1058
|
+
return yield* Effect4.fail(
|
|
1059
|
+
new IndexNotFoundError({ path: resolvedRoot }),
|
|
1060
|
+
);
|
|
1061
|
+
}
|
|
1062
|
+
const provider = options.provider ?? (yield* createOpenAIProvider());
|
|
1063
|
+
const dimensions = provider.dimensions;
|
|
1064
|
+
const vectorStore = createVectorStore(resolvedRoot, dimensions);
|
|
1065
|
+
vectorStore.setProvider(provider.name);
|
|
1066
|
+
if (!options.force) {
|
|
1067
|
+
const loaded = yield* vectorStore.load();
|
|
1068
|
+
if (loaded) {
|
|
1069
|
+
const stats = vectorStore.getStats();
|
|
1070
|
+
if (stats.count > 0) {
|
|
1071
|
+
const duration2 = Date.now() - startTime;
|
|
1072
|
+
const estimatedSavings =
|
|
1073
|
+
(stats.totalTokens / 1e6) * EMBEDDING_PRICE_PER_MILLION;
|
|
1074
|
+
return {
|
|
1075
|
+
sectionsEmbedded: 0,
|
|
1076
|
+
tokensUsed: 0,
|
|
1077
|
+
cost: 0,
|
|
1078
|
+
duration: duration2,
|
|
1079
|
+
filesProcessed: 0,
|
|
1080
|
+
cacheHit: true,
|
|
1081
|
+
existingVectors: stats.count,
|
|
1082
|
+
estimatedSavings,
|
|
1083
|
+
};
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
const isExcluded = (docPath) => {
|
|
1088
|
+
if (!options.excludePatterns?.length) return false;
|
|
1089
|
+
return options.excludePatterns.some((pattern) => {
|
|
1090
|
+
const regex = new RegExp(
|
|
1091
|
+
`^${pattern.replace(/\*/g, ".*").replace(/\?/g, ".")}$`,
|
|
1092
|
+
);
|
|
1093
|
+
return regex.test(docPath);
|
|
1094
|
+
});
|
|
1095
|
+
};
|
|
1096
|
+
const sectionsByDoc = /* @__PURE__ */ new Map();
|
|
1097
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
1098
|
+
const document = docIndex.documents[section.documentPath];
|
|
1099
|
+
if (!document) continue;
|
|
1100
|
+
if (section.tokenCount < 10) continue;
|
|
1101
|
+
if (isExcluded(section.documentPath)) continue;
|
|
1102
|
+
let parentHeading;
|
|
1103
|
+
if (section.level > 1) {
|
|
1104
|
+
const docSections = sectionIndex.byDocument[document.id] ?? [];
|
|
1105
|
+
for (const sibId of docSections) {
|
|
1106
|
+
const sib = sectionIndex.sections[sibId];
|
|
1107
|
+
if (
|
|
1108
|
+
sib &&
|
|
1109
|
+
sib.level === section.level - 1 &&
|
|
1110
|
+
sib.startLine < section.startLine
|
|
1111
|
+
) {
|
|
1112
|
+
parentHeading = sib.heading;
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
const docPath = section.documentPath;
|
|
1117
|
+
if (!sectionsByDoc.has(docPath)) {
|
|
1118
|
+
sectionsByDoc.set(docPath, []);
|
|
1119
|
+
}
|
|
1120
|
+
sectionsByDoc.get(docPath).push({ section, parentHeading });
|
|
1121
|
+
}
|
|
1122
|
+
if (sectionsByDoc.size === 0) {
|
|
1123
|
+
const duration2 = Date.now() - startTime;
|
|
1124
|
+
return {
|
|
1125
|
+
sectionsEmbedded: 0,
|
|
1126
|
+
tokensUsed: 0,
|
|
1127
|
+
cost: 0,
|
|
1128
|
+
duration: duration2,
|
|
1129
|
+
filesProcessed: 0,
|
|
1130
|
+
};
|
|
1131
|
+
}
|
|
1132
|
+
const sectionsToEmbed = [];
|
|
1133
|
+
const docPaths = Array.from(sectionsByDoc.keys());
|
|
1134
|
+
let filesProcessed = 0;
|
|
1135
|
+
for (let fileIndex = 0; fileIndex < docPaths.length; fileIndex++) {
|
|
1136
|
+
const docPath = docPaths[fileIndex];
|
|
1137
|
+
const sections = sectionsByDoc.get(docPath);
|
|
1138
|
+
const document = docIndex.documents[docPath];
|
|
1139
|
+
if (!document) continue;
|
|
1140
|
+
if (options.onFileProgress) {
|
|
1141
|
+
options.onFileProgress({
|
|
1142
|
+
fileIndex: fileIndex + 1,
|
|
1143
|
+
totalFiles: docPaths.length,
|
|
1144
|
+
filePath: docPath,
|
|
1145
|
+
sectionCount: sections.length,
|
|
1146
|
+
});
|
|
1147
|
+
}
|
|
1148
|
+
const filePath = path3.join(resolvedRoot, docPath);
|
|
1149
|
+
const fileContentResult = yield* Effect4.promise(() =>
|
|
1150
|
+
fs3.readFile(filePath, "utf-8"),
|
|
1151
|
+
).pipe(
|
|
1152
|
+
Effect4.map((content) => ({ ok: true, content })),
|
|
1153
|
+
Effect4.catchAll(() => Effect4.succeed({ ok: false, content: "" })),
|
|
1154
|
+
);
|
|
1155
|
+
if (!fileContentResult.ok) {
|
|
1156
|
+
yield* Effect4.logWarning(`Skipping file (cannot read): ${docPath}`);
|
|
1157
|
+
continue;
|
|
1158
|
+
}
|
|
1159
|
+
filesProcessed++;
|
|
1160
|
+
const lines = fileContentResult.content.split("\n");
|
|
1161
|
+
for (const { section, parentHeading } of sections) {
|
|
1162
|
+
const content = lines
|
|
1163
|
+
.slice(section.startLine - 1, section.endLine)
|
|
1164
|
+
.join("\n");
|
|
1165
|
+
const text = generateEmbeddingText(
|
|
1166
|
+
section,
|
|
1167
|
+
content,
|
|
1168
|
+
document.title,
|
|
1169
|
+
parentHeading,
|
|
1170
|
+
);
|
|
1171
|
+
sectionsToEmbed.push({ section, text });
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
if (sectionsToEmbed.length === 0) {
|
|
1175
|
+
const duration2 = Date.now() - startTime;
|
|
1176
|
+
return {
|
|
1177
|
+
sectionsEmbedded: 0,
|
|
1178
|
+
tokensUsed: 0,
|
|
1179
|
+
cost: 0,
|
|
1180
|
+
duration: duration2,
|
|
1181
|
+
filesProcessed,
|
|
1182
|
+
};
|
|
1183
|
+
}
|
|
1184
|
+
const texts = sectionsToEmbed.map((s) => s.text);
|
|
1185
|
+
const result = yield* wrapEmbedding(provider.embed(texts));
|
|
1186
|
+
const entries = [];
|
|
1187
|
+
for (let i = 0; i < sectionsToEmbed.length; i++) {
|
|
1188
|
+
const { section } = sectionsToEmbed[i] ?? { section: null };
|
|
1189
|
+
const embedding = result.embeddings[i];
|
|
1190
|
+
if (!section || !embedding) continue;
|
|
1191
|
+
entries.push({
|
|
1192
|
+
id: section.id,
|
|
1193
|
+
sectionId: section.id,
|
|
1194
|
+
documentPath: section.documentPath,
|
|
1195
|
+
heading: section.heading,
|
|
1196
|
+
embedding,
|
|
1197
|
+
});
|
|
1198
|
+
}
|
|
1199
|
+
yield* vectorStore.add(entries);
|
|
1200
|
+
vectorStore.addCost(result.cost, result.tokensUsed);
|
|
1201
|
+
yield* vectorStore.save();
|
|
1202
|
+
const duration = Date.now() - startTime;
|
|
1203
|
+
return {
|
|
1204
|
+
sectionsEmbedded: entries.length,
|
|
1205
|
+
tokensUsed: result.tokensUsed,
|
|
1206
|
+
cost: result.cost,
|
|
1207
|
+
duration,
|
|
1208
|
+
filesProcessed,
|
|
1209
|
+
};
|
|
1210
|
+
});
|
|
1211
|
+
var semanticSearch = (rootPath, query, options = {}) =>
|
|
1212
|
+
Effect4.gen(function* () {
|
|
1213
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
1214
|
+
const provider = yield* createOpenAIProvider();
|
|
1215
|
+
const dimensions = provider.dimensions;
|
|
1216
|
+
const vectorStore = createVectorStore(resolvedRoot, dimensions);
|
|
1217
|
+
const loaded = yield* vectorStore.load();
|
|
1218
|
+
if (!loaded) {
|
|
1219
|
+
return yield* Effect4.fail(
|
|
1220
|
+
new EmbeddingsNotFoundError({ path: resolvedRoot }),
|
|
1221
|
+
);
|
|
1222
|
+
}
|
|
1223
|
+
const queryResult = yield* wrapEmbedding(provider.embed([query]));
|
|
1224
|
+
const queryVector = queryResult.embeddings[0];
|
|
1225
|
+
if (!queryVector) {
|
|
1226
|
+
return yield* Effect4.fail(
|
|
1227
|
+
new EmbeddingError({
|
|
1228
|
+
reason: "Unknown",
|
|
1229
|
+
message: "Failed to generate query embedding",
|
|
1230
|
+
provider: "OpenAI",
|
|
1231
|
+
}),
|
|
1232
|
+
);
|
|
1233
|
+
}
|
|
1234
|
+
const limit = options.limit ?? 10;
|
|
1235
|
+
const threshold = options.threshold ?? 0;
|
|
1236
|
+
const searchResults = yield* vectorStore.search(
|
|
1237
|
+
queryVector,
|
|
1238
|
+
limit * 2,
|
|
1239
|
+
threshold,
|
|
1240
|
+
);
|
|
1241
|
+
let filteredResults = searchResults;
|
|
1242
|
+
if (options.pathPattern) {
|
|
1243
|
+
const pattern = options.pathPattern
|
|
1244
|
+
.replace(/\./g, "\\.")
|
|
1245
|
+
.replace(/\*/g, ".*");
|
|
1246
|
+
const regex = new RegExp(`^${pattern}$`, "i");
|
|
1247
|
+
filteredResults = searchResults.filter((r) => regex.test(r.documentPath));
|
|
1248
|
+
}
|
|
1249
|
+
const results = filteredResults.slice(0, limit).map((r) => ({
|
|
1250
|
+
sectionId: r.sectionId,
|
|
1251
|
+
documentPath: r.documentPath,
|
|
1252
|
+
heading: r.heading,
|
|
1253
|
+
similarity: r.similarity,
|
|
1254
|
+
}));
|
|
1255
|
+
return results;
|
|
1256
|
+
});
|
|
1257
|
+
var getEmbeddingStats = (rootPath) =>
|
|
1258
|
+
Effect4.gen(function* () {
|
|
1259
|
+
const resolvedRoot = path3.resolve(rootPath);
|
|
1260
|
+
const vectorStore = createVectorStore(resolvedRoot, 1536);
|
|
1261
|
+
const loaded = yield* vectorStore.load();
|
|
1262
|
+
if (!loaded) {
|
|
1263
|
+
return {
|
|
1264
|
+
hasEmbeddings: false,
|
|
1265
|
+
count: 0,
|
|
1266
|
+
provider: "none",
|
|
1267
|
+
dimensions: 0,
|
|
1268
|
+
totalCost: 0,
|
|
1269
|
+
totalTokens: 0,
|
|
1270
|
+
};
|
|
1271
|
+
}
|
|
1272
|
+
const stats = vectorStore.getStats();
|
|
1273
|
+
return {
|
|
1274
|
+
hasEmbeddings: true,
|
|
1275
|
+
count: stats.count,
|
|
1276
|
+
provider: stats.provider,
|
|
1277
|
+
dimensions: stats.dimensions,
|
|
1278
|
+
totalCost: stats.totalCost,
|
|
1279
|
+
totalTokens: stats.totalTokens,
|
|
1280
|
+
};
|
|
1281
|
+
});
|
|
1282
|
+
|
|
1283
|
+
import { Effect as Effect5 } from "effect";
|
|
1284
|
+
// src/search/searcher.ts
|
|
1285
|
+
import * as fs4 from "fs/promises";
|
|
1286
|
+
import * as path4 from "path";
|
|
1287
|
+
|
|
1288
|
+
// src/search/query-parser.ts
|
|
1289
|
+
var tokenize = (query) => {
|
|
1290
|
+
const tokens = [];
|
|
1291
|
+
let i = 0;
|
|
1292
|
+
while (i < query.length) {
|
|
1293
|
+
if (/\s/.test(query[i])) {
|
|
1294
|
+
i++;
|
|
1295
|
+
continue;
|
|
1296
|
+
}
|
|
1297
|
+
if (query[i] === '"') {
|
|
1298
|
+
const start = i + 1;
|
|
1299
|
+
i++;
|
|
1300
|
+
while (i < query.length && query[i] !== '"') {
|
|
1301
|
+
i++;
|
|
1302
|
+
}
|
|
1303
|
+
const value = query.slice(start, i);
|
|
1304
|
+
tokens.push({ type: "PHRASE", value });
|
|
1305
|
+
i++;
|
|
1306
|
+
continue;
|
|
1307
|
+
}
|
|
1308
|
+
if (query[i] === "(") {
|
|
1309
|
+
tokens.push({ type: "LPAREN", value: "(" });
|
|
1310
|
+
i++;
|
|
1311
|
+
continue;
|
|
1312
|
+
}
|
|
1313
|
+
if (query[i] === ")") {
|
|
1314
|
+
tokens.push({ type: "RPAREN", value: ")" });
|
|
1315
|
+
i++;
|
|
1316
|
+
continue;
|
|
1317
|
+
}
|
|
1318
|
+
const wordMatch = query.slice(i).match(/^[^\s()"]+/);
|
|
1319
|
+
if (wordMatch) {
|
|
1320
|
+
const word = wordMatch[0];
|
|
1321
|
+
const upperWord = word.toUpperCase();
|
|
1322
|
+
if (upperWord === "AND") {
|
|
1323
|
+
tokens.push({ type: "AND", value: "AND" });
|
|
1324
|
+
} else if (upperWord === "OR") {
|
|
1325
|
+
tokens.push({ type: "OR", value: "OR" });
|
|
1326
|
+
} else if (upperWord === "NOT") {
|
|
1327
|
+
tokens.push({ type: "NOT", value: "NOT" });
|
|
1328
|
+
} else {
|
|
1329
|
+
tokens.push({ type: "TERM", value: word });
|
|
1330
|
+
}
|
|
1331
|
+
i += word.length;
|
|
1332
|
+
continue;
|
|
1333
|
+
}
|
|
1334
|
+
i++;
|
|
1335
|
+
}
|
|
1336
|
+
return tokens;
|
|
1337
|
+
};
|
|
1338
|
+
var Parser = class {
|
|
1339
|
+
tokens;
|
|
1340
|
+
pos = 0;
|
|
1341
|
+
terms = [];
|
|
1342
|
+
phrases = [];
|
|
1343
|
+
constructor(tokens) {
|
|
1344
|
+
this.tokens = tokens;
|
|
1345
|
+
}
|
|
1346
|
+
current() {
|
|
1347
|
+
return this.tokens[this.pos];
|
|
1348
|
+
}
|
|
1349
|
+
advance() {
|
|
1350
|
+
return this.tokens[this.pos++];
|
|
1351
|
+
}
|
|
1352
|
+
match(type) {
|
|
1353
|
+
if (this.current()?.type === type) {
|
|
1354
|
+
this.advance();
|
|
1355
|
+
return true;
|
|
1356
|
+
}
|
|
1357
|
+
return false;
|
|
1358
|
+
}
|
|
1359
|
+
parse() {
|
|
1360
|
+
if (this.tokens.length === 0) {
|
|
1361
|
+
return null;
|
|
1362
|
+
}
|
|
1363
|
+
return this.parseExpr();
|
|
1364
|
+
}
|
|
1365
|
+
parseExpr() {
|
|
1366
|
+
let left = this.parseAndExpr();
|
|
1367
|
+
while (this.match("OR")) {
|
|
1368
|
+
const right = this.parseAndExpr();
|
|
1369
|
+
left = { type: "or", left, right };
|
|
1370
|
+
}
|
|
1371
|
+
return left;
|
|
1372
|
+
}
|
|
1373
|
+
parseAndExpr() {
|
|
1374
|
+
let left = this.parseNotExpr();
|
|
1375
|
+
while (this.match("AND") || this.isImplicitAnd()) {
|
|
1376
|
+
const right = this.parseNotExpr();
|
|
1377
|
+
left = { type: "and", left, right };
|
|
1378
|
+
}
|
|
1379
|
+
return left;
|
|
1380
|
+
}
|
|
1381
|
+
isImplicitAnd() {
|
|
1382
|
+
const tok = this.current();
|
|
1383
|
+
return (
|
|
1384
|
+
tok?.type === "TERM" ||
|
|
1385
|
+
tok?.type === "PHRASE" ||
|
|
1386
|
+
tok?.type === "NOT" ||
|
|
1387
|
+
tok?.type === "LPAREN"
|
|
1388
|
+
);
|
|
1389
|
+
}
|
|
1390
|
+
parseNotExpr() {
|
|
1391
|
+
if (this.match("NOT")) {
|
|
1392
|
+
const operand = this.parseNotExpr();
|
|
1393
|
+
return { type: "not", operand };
|
|
1394
|
+
}
|
|
1395
|
+
return this.parsePrimary();
|
|
1396
|
+
}
|
|
1397
|
+
parsePrimary() {
|
|
1398
|
+
const tok = this.current();
|
|
1399
|
+
if (this.match("LPAREN")) {
|
|
1400
|
+
const expr = this.parseExpr();
|
|
1401
|
+
this.match("RPAREN");
|
|
1402
|
+
return expr;
|
|
1403
|
+
}
|
|
1404
|
+
if (tok?.type === "PHRASE") {
|
|
1405
|
+
this.advance();
|
|
1406
|
+
this.phrases.push(tok.value);
|
|
1407
|
+
return { type: "phrase", value: tok.value };
|
|
1408
|
+
}
|
|
1409
|
+
if (tok?.type === "TERM") {
|
|
1410
|
+
this.advance();
|
|
1411
|
+
this.terms.push(tok.value);
|
|
1412
|
+
return { type: "term", value: tok.value };
|
|
1413
|
+
}
|
|
1414
|
+
return { type: "term", value: "" };
|
|
1415
|
+
}
|
|
1416
|
+
};
|
|
1417
|
+
var parseQuery = (query) => {
|
|
1418
|
+
const tokens = tokenize(query);
|
|
1419
|
+
if (tokens.length === 0) {
|
|
1420
|
+
return null;
|
|
1421
|
+
}
|
|
1422
|
+
const parser = new Parser(tokens);
|
|
1423
|
+
const ast = parser.parse();
|
|
1424
|
+
if (!ast) {
|
|
1425
|
+
return null;
|
|
1426
|
+
}
|
|
1427
|
+
return {
|
|
1428
|
+
ast,
|
|
1429
|
+
terms: parser.terms,
|
|
1430
|
+
phrases: parser.phrases,
|
|
1431
|
+
};
|
|
1432
|
+
};
|
|
1433
|
+
var isAdvancedQuery = (query) => {
|
|
1434
|
+
const tokens = tokenize(query);
|
|
1435
|
+
return tokens.some(
|
|
1436
|
+
(t) =>
|
|
1437
|
+
t.type === "AND" ||
|
|
1438
|
+
t.type === "OR" ||
|
|
1439
|
+
t.type === "NOT" ||
|
|
1440
|
+
t.type === "PHRASE" ||
|
|
1441
|
+
t.type === "LPAREN",
|
|
1442
|
+
);
|
|
1443
|
+
};
|
|
1444
|
+
var evaluateQuery = (ast, text) => {
|
|
1445
|
+
const lowerText = text.toLowerCase();
|
|
1446
|
+
const evaluate = (node) => {
|
|
1447
|
+
switch (node.type) {
|
|
1448
|
+
case "term": {
|
|
1449
|
+
if (!node.value) return true;
|
|
1450
|
+
return lowerText.includes(node.value.toLowerCase());
|
|
1451
|
+
}
|
|
1452
|
+
case "phrase": {
|
|
1453
|
+
return lowerText.includes(node.value.toLowerCase());
|
|
1454
|
+
}
|
|
1455
|
+
case "and": {
|
|
1456
|
+
return evaluate(node.left) && evaluate(node.right);
|
|
1457
|
+
}
|
|
1458
|
+
case "or": {
|
|
1459
|
+
return evaluate(node.left) || evaluate(node.right);
|
|
1460
|
+
}
|
|
1461
|
+
case "not": {
|
|
1462
|
+
return !evaluate(node.operand);
|
|
1463
|
+
}
|
|
1464
|
+
}
|
|
1465
|
+
};
|
|
1466
|
+
return evaluate(ast);
|
|
1467
|
+
};
|
|
1468
|
+
var buildHighlightPattern = (parsed) => {
|
|
1469
|
+
const patterns = [];
|
|
1470
|
+
const escapeChars = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1471
|
+
for (const term of parsed.terms) {
|
|
1472
|
+
if (term) {
|
|
1473
|
+
patterns.push(`\\b${escapeChars(term)}\\b`);
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1476
|
+
for (const phrase of parsed.phrases) {
|
|
1477
|
+
if (phrase) {
|
|
1478
|
+
patterns.push(escapeChars(phrase));
|
|
1479
|
+
}
|
|
1480
|
+
}
|
|
1481
|
+
if (patterns.length === 0) {
|
|
1482
|
+
return /.^/;
|
|
1483
|
+
}
|
|
1484
|
+
return new RegExp(patterns.join("|"), "gi");
|
|
1485
|
+
};
|
|
1486
|
+
|
|
1487
|
+
// src/search/searcher.ts
|
|
1488
|
+
var matchPath = (filePath, pattern) => {
|
|
1489
|
+
const regexPattern = pattern
|
|
1490
|
+
.replace(/\./g, "\\.")
|
|
1491
|
+
.replace(/\*/g, ".*")
|
|
1492
|
+
.replace(/\?/g, ".");
|
|
1493
|
+
const regex = new RegExp(`^${regexPattern}$`, "i");
|
|
1494
|
+
return regex.test(filePath);
|
|
1495
|
+
};
|
|
1496
|
+
var search = (rootPath, options = {}) =>
|
|
1497
|
+
Effect5.gen(function* () {
|
|
1498
|
+
const storage = createStorage(rootPath);
|
|
1499
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
1500
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
1501
|
+
if (!docIndex || !sectionIndex) {
|
|
1502
|
+
return [];
|
|
1503
|
+
}
|
|
1504
|
+
const results = [];
|
|
1505
|
+
const headingRegex = options.heading
|
|
1506
|
+
? new RegExp(options.heading, "i")
|
|
1507
|
+
: null;
|
|
1508
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
1509
|
+
if (headingRegex && !headingRegex.test(section.heading)) {
|
|
1510
|
+
continue;
|
|
1511
|
+
}
|
|
1512
|
+
if (
|
|
1513
|
+
options.pathPattern &&
|
|
1514
|
+
!matchPath(section.documentPath, options.pathPattern)
|
|
1515
|
+
) {
|
|
1516
|
+
continue;
|
|
1517
|
+
}
|
|
1518
|
+
if (options.hasCode !== void 0 && section.hasCode !== options.hasCode) {
|
|
1519
|
+
continue;
|
|
1520
|
+
}
|
|
1521
|
+
if (options.hasList !== void 0 && section.hasList !== options.hasList) {
|
|
1522
|
+
continue;
|
|
1523
|
+
}
|
|
1524
|
+
if (
|
|
1525
|
+
options.hasTable !== void 0 &&
|
|
1526
|
+
section.hasTable !== options.hasTable
|
|
1527
|
+
) {
|
|
1528
|
+
continue;
|
|
1529
|
+
}
|
|
1530
|
+
if (options.minLevel !== void 0 && section.level < options.minLevel) {
|
|
1531
|
+
continue;
|
|
1532
|
+
}
|
|
1533
|
+
if (options.maxLevel !== void 0 && section.level > options.maxLevel) {
|
|
1534
|
+
continue;
|
|
1535
|
+
}
|
|
1536
|
+
const document = docIndex.documents[section.documentPath];
|
|
1537
|
+
if (document) {
|
|
1538
|
+
results.push({ section, document });
|
|
1539
|
+
}
|
|
1540
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
1541
|
+
break;
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
return results;
|
|
1545
|
+
});
|
|
1546
|
+
var searchContent = (rootPath, options = {}) =>
|
|
1547
|
+
Effect5.gen(function* () {
|
|
1548
|
+
const storage = createStorage(rootPath);
|
|
1549
|
+
const docIndex = yield* loadDocumentIndex(storage);
|
|
1550
|
+
const sectionIndex = yield* loadSectionIndex(storage);
|
|
1551
|
+
if (!docIndex || !sectionIndex) {
|
|
1552
|
+
return [];
|
|
1553
|
+
}
|
|
1554
|
+
let parsedQuery = null;
|
|
1555
|
+
let contentRegex = null;
|
|
1556
|
+
let highlightRegex = null;
|
|
1557
|
+
if (options.content) {
|
|
1558
|
+
if (isAdvancedQuery(options.content)) {
|
|
1559
|
+
parsedQuery = parseQuery(options.content);
|
|
1560
|
+
if (parsedQuery) {
|
|
1561
|
+
highlightRegex = buildHighlightPattern(parsedQuery);
|
|
1562
|
+
}
|
|
1563
|
+
} else {
|
|
1564
|
+
contentRegex = new RegExp(options.content, "gi");
|
|
1565
|
+
highlightRegex = contentRegex;
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
const headingRegex = options.heading
|
|
1569
|
+
? new RegExp(options.heading, "i")
|
|
1570
|
+
: null;
|
|
1571
|
+
const results = [];
|
|
1572
|
+
const sectionsByDoc = {};
|
|
1573
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
1574
|
+
const docSections = sectionsByDoc[section.documentPath];
|
|
1575
|
+
if (docSections) {
|
|
1576
|
+
docSections.push(section);
|
|
1577
|
+
} else {
|
|
1578
|
+
sectionsByDoc[section.documentPath] = [section];
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
for (const [docPath, sections] of Object.entries(sectionsByDoc)) {
|
|
1582
|
+
if (options.pathPattern && !matchPath(docPath, options.pathPattern)) {
|
|
1583
|
+
continue;
|
|
1584
|
+
}
|
|
1585
|
+
const document = docIndex.documents[docPath];
|
|
1586
|
+
if (!document) continue;
|
|
1587
|
+
let fileContent = null;
|
|
1588
|
+
let fileLines = [];
|
|
1589
|
+
if (parsedQuery || contentRegex) {
|
|
1590
|
+
const filePath = path4.join(storage.rootPath, docPath);
|
|
1591
|
+
try {
|
|
1592
|
+
fileContent = yield* Effect5.promise(() =>
|
|
1593
|
+
fs4.readFile(filePath, "utf-8"),
|
|
1594
|
+
);
|
|
1595
|
+
fileLines = fileContent.split("\n");
|
|
1596
|
+
} catch {
|
|
1597
|
+
continue;
|
|
1598
|
+
}
|
|
1599
|
+
}
|
|
1600
|
+
for (const section of sections) {
|
|
1601
|
+
if (headingRegex && !headingRegex.test(section.heading)) {
|
|
1602
|
+
continue;
|
|
1603
|
+
}
|
|
1604
|
+
if (options.hasCode !== void 0 && section.hasCode !== options.hasCode) {
|
|
1605
|
+
continue;
|
|
1606
|
+
}
|
|
1607
|
+
if (options.hasList !== void 0 && section.hasList !== options.hasList) {
|
|
1608
|
+
continue;
|
|
1609
|
+
}
|
|
1610
|
+
if (
|
|
1611
|
+
options.hasTable !== void 0 &&
|
|
1612
|
+
section.hasTable !== options.hasTable
|
|
1613
|
+
) {
|
|
1614
|
+
continue;
|
|
1615
|
+
}
|
|
1616
|
+
if (options.minLevel !== void 0 && section.level < options.minLevel) {
|
|
1617
|
+
continue;
|
|
1618
|
+
}
|
|
1619
|
+
if (options.maxLevel !== void 0 && section.level > options.maxLevel) {
|
|
1620
|
+
continue;
|
|
1621
|
+
}
|
|
1622
|
+
if ((parsedQuery || contentRegex) && fileContent) {
|
|
1623
|
+
const sectionLines = fileLines.slice(
|
|
1624
|
+
section.startLine - 1,
|
|
1625
|
+
section.endLine,
|
|
1626
|
+
);
|
|
1627
|
+
const sectionContent = sectionLines.join("\n");
|
|
1628
|
+
if (parsedQuery) {
|
|
1629
|
+
if (!evaluateQuery(parsedQuery.ast, sectionContent)) {
|
|
1630
|
+
continue;
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
const matches = [];
|
|
1634
|
+
const searchRegex = contentRegex || highlightRegex;
|
|
1635
|
+
const contextBefore = options.contextBefore ?? 1;
|
|
1636
|
+
const contextAfter = options.contextAfter ?? 1;
|
|
1637
|
+
if (searchRegex) {
|
|
1638
|
+
for (let i = 0; i < sectionLines.length; i++) {
|
|
1639
|
+
const line = sectionLines[i];
|
|
1640
|
+
if (line && searchRegex.test(line)) {
|
|
1641
|
+
searchRegex.lastIndex = 0;
|
|
1642
|
+
const absoluteLineNum = section.startLine + i;
|
|
1643
|
+
const snippetStart = Math.max(0, i - contextBefore);
|
|
1644
|
+
const snippetEnd = Math.min(
|
|
1645
|
+
sectionLines.length,
|
|
1646
|
+
i + contextAfter + 1,
|
|
1647
|
+
);
|
|
1648
|
+
const snippetLines = sectionLines.slice(
|
|
1649
|
+
snippetStart,
|
|
1650
|
+
snippetEnd,
|
|
1651
|
+
);
|
|
1652
|
+
const snippet = snippetLines.join("\n");
|
|
1653
|
+
const contextLines = [];
|
|
1654
|
+
for (let j = snippetStart; j < snippetEnd; j++) {
|
|
1655
|
+
const ctxLine = sectionLines[j];
|
|
1656
|
+
if (ctxLine !== void 0) {
|
|
1657
|
+
contextLines.push({
|
|
1658
|
+
lineNumber: section.startLine + j,
|
|
1659
|
+
line: ctxLine,
|
|
1660
|
+
isMatch: j === i,
|
|
1661
|
+
});
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
matches.push({
|
|
1665
|
+
lineNumber: absoluteLineNum,
|
|
1666
|
+
line,
|
|
1667
|
+
snippet,
|
|
1668
|
+
contextLines,
|
|
1669
|
+
});
|
|
1670
|
+
}
|
|
1671
|
+
}
|
|
1672
|
+
}
|
|
1673
|
+
if (parsedQuery || matches.length > 0) {
|
|
1674
|
+
const result = {
|
|
1675
|
+
section,
|
|
1676
|
+
document,
|
|
1677
|
+
sectionContent,
|
|
1678
|
+
};
|
|
1679
|
+
if (matches.length > 0) {
|
|
1680
|
+
results.push({ ...result, matches });
|
|
1681
|
+
} else {
|
|
1682
|
+
results.push(result);
|
|
1683
|
+
}
|
|
1684
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
1685
|
+
return results;
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
} else if (!parsedQuery && !contentRegex) {
|
|
1689
|
+
results.push({ section, document });
|
|
1690
|
+
if (options.limit !== void 0 && results.length >= options.limit) {
|
|
1691
|
+
return results;
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
return results;
|
|
1697
|
+
});
|
|
1698
|
+
|
|
1699
|
+
export {
|
|
1700
|
+
formatSummary,
|
|
1701
|
+
formatAssembledContext,
|
|
1702
|
+
summarizeFile,
|
|
1703
|
+
assembleContext,
|
|
1704
|
+
checkPricingFreshness,
|
|
1705
|
+
getPricingDate,
|
|
1706
|
+
estimateEmbeddingCost,
|
|
1707
|
+
buildEmbeddings,
|
|
1708
|
+
semanticSearch,
|
|
1709
|
+
getEmbeddingStats,
|
|
1710
|
+
isAdvancedQuery,
|
|
1711
|
+
search,
|
|
1712
|
+
searchContent,
|
|
1713
|
+
};
|