ollama-intern-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +252 -0
- package/SECURITY.md +22 -0
- package/dist/corpus/chunker.d.ts +63 -0
- package/dist/corpus/chunker.d.ts.map +1 -0
- package/dist/corpus/chunker.js +230 -0
- package/dist/corpus/chunker.js.map +1 -0
- package/dist/corpus/fusion.d.ts +67 -0
- package/dist/corpus/fusion.d.ts.map +1 -0
- package/dist/corpus/fusion.js +76 -0
- package/dist/corpus/fusion.js.map +1 -0
- package/dist/corpus/indexer.d.ts +33 -0
- package/dist/corpus/indexer.d.ts.map +1 -0
- package/dist/corpus/indexer.js +201 -0
- package/dist/corpus/indexer.js.map +1 -0
- package/dist/corpus/lexical.d.ts +95 -0
- package/dist/corpus/lexical.d.ts.map +1 -0
- package/dist/corpus/lexical.js +227 -0
- package/dist/corpus/lexical.js.map +1 -0
- package/dist/corpus/manifest.d.ts +29 -0
- package/dist/corpus/manifest.d.ts.map +1 -0
- package/dist/corpus/manifest.js +45 -0
- package/dist/corpus/manifest.js.map +1 -0
- package/dist/corpus/refresh.d.ts +54 -0
- package/dist/corpus/refresh.d.ts.map +1 -0
- package/dist/corpus/refresh.js +155 -0
- package/dist/corpus/refresh.js.map +1 -0
- package/dist/corpus/searcher.d.ts +48 -0
- package/dist/corpus/searcher.d.ts.map +1 -0
- package/dist/corpus/searcher.js +131 -0
- package/dist/corpus/searcher.js.map +1 -0
- package/dist/corpus/storage.d.ts +56 -0
- package/dist/corpus/storage.d.ts.map +1 -0
- package/dist/corpus/storage.js +81 -0
- package/dist/corpus/storage.js.map +1 -0
- package/dist/coverage.d.ts +38 -0
- package/dist/coverage.d.ts.map +1 -0
- package/dist/coverage.js +117 -0
- package/dist/coverage.js.map +1 -0
- package/dist/embedMath.d.ts +22 -0
- package/dist/embedMath.d.ts.map +1 -0
- package/dist/embedMath.js +34 -0
- package/dist/embedMath.js.map +1 -0
- package/dist/envelope.d.ts +53 -0
- package/dist/envelope.d.ts.map +1 -0
- package/dist/envelope.js +32 -0
- package/dist/envelope.js.map +1 -0
- package/dist/errors.d.ts +20 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +30 -0
- package/dist/errors.js.map +1 -0
- package/dist/evals/retrieval.d.ts +68 -0
- package/dist/evals/retrieval.d.ts.map +1 -0
- package/dist/evals/retrieval.js +173 -0
- package/dist/evals/retrieval.js.map +1 -0
- package/dist/guardrails/citations.d.ts +28 -0
- package/dist/guardrails/citations.d.ts.map +1 -0
- package/dist/guardrails/citations.js +45 -0
- package/dist/guardrails/citations.js.map +1 -0
- package/dist/guardrails/compileCheck.d.ts +20 -0
- package/dist/guardrails/compileCheck.d.ts.map +1 -0
- package/dist/guardrails/compileCheck.js +96 -0
- package/dist/guardrails/compileCheck.js.map +1 -0
- package/dist/guardrails/confidence.d.ts +21 -0
- package/dist/guardrails/confidence.d.ts.map +1 -0
- package/dist/guardrails/confidence.js +18 -0
- package/dist/guardrails/confidence.js.map +1 -0
- package/dist/guardrails/timeouts.d.ts +35 -0
- package/dist/guardrails/timeouts.d.ts.map +1 -0
- package/dist/guardrails/timeouts.js +58 -0
- package/dist/guardrails/timeouts.js.map +1 -0
- package/dist/guardrails/writeConfirm.d.ts +25 -0
- package/dist/guardrails/writeConfirm.d.ts.map +1 -0
- package/dist/guardrails/writeConfirm.js +35 -0
- package/dist/guardrails/writeConfirm.js.map +1 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +153 -0
- package/dist/index.js.map +1 -0
- package/dist/observability.d.ts +66 -0
- package/dist/observability.d.ts.map +1 -0
- package/dist/observability.js +51 -0
- package/dist/observability.js.map +1 -0
- package/dist/ollama.d.ts +105 -0
- package/dist/ollama.d.ts.map +1 -0
- package/dist/ollama.js +109 -0
- package/dist/ollama.js.map +1 -0
- package/dist/prewarm.d.ts +28 -0
- package/dist/prewarm.d.ts.map +1 -0
- package/dist/prewarm.js +69 -0
- package/dist/prewarm.js.map +1 -0
- package/dist/profiles.d.ts +54 -0
- package/dist/profiles.d.ts.map +1 -0
- package/dist/profiles.js +95 -0
- package/dist/profiles.js.map +1 -0
- package/dist/protectedPaths.d.ts +33 -0
- package/dist/protectedPaths.d.ts.map +1 -0
- package/dist/protectedPaths.js +54 -0
- package/dist/protectedPaths.js.map +1 -0
- package/dist/runContext.d.ts +21 -0
- package/dist/runContext.d.ts.map +1 -0
- package/dist/runContext.js +9 -0
- package/dist/runContext.js.map +1 -0
- package/dist/semaphore.d.ts +16 -0
- package/dist/semaphore.d.ts.map +1 -0
- package/dist/semaphore.js +37 -0
- package/dist/semaphore.js.map +1 -0
- package/dist/sources.d.ts +19 -0
- package/dist/sources.d.ts.map +1 -0
- package/dist/sources.js +41 -0
- package/dist/sources.js.map +1 -0
- package/dist/tiers.d.ts +39 -0
- package/dist/tiers.d.ts.map +1 -0
- package/dist/tiers.js +44 -0
- package/dist/tiers.js.map +1 -0
- package/dist/tools/artifactDiff.d.ts +66 -0
- package/dist/tools/artifactDiff.d.ts.map +1 -0
- package/dist/tools/artifactDiff.js +66 -0
- package/dist/tools/artifactDiff.js.map +1 -0
- package/dist/tools/artifactExportToPath.d.ts +38 -0
- package/dist/tools/artifactExportToPath.d.ts.map +1 -0
- package/dist/tools/artifactExportToPath.js +51 -0
- package/dist/tools/artifactExportToPath.js.map +1 -0
- package/dist/tools/artifactList.d.ts +60 -0
- package/dist/tools/artifactList.d.ts.map +1 -0
- package/dist/tools/artifactList.js +107 -0
- package/dist/tools/artifactList.js.map +1 -0
- package/dist/tools/artifactRead.d.ts +43 -0
- package/dist/tools/artifactRead.d.ts.map +1 -0
- package/dist/tools/artifactRead.js +83 -0
- package/dist/tools/artifactRead.js.map +1 -0
- package/dist/tools/artifactSnippets.d.ts +56 -0
- package/dist/tools/artifactSnippets.d.ts.map +1 -0
- package/dist/tools/artifactSnippets.js +97 -0
- package/dist/tools/artifactSnippets.js.map +1 -0
- package/dist/tools/artifacts/diff.d.ts +161 -0
- package/dist/tools/artifacts/diff.d.ts.map +1 -0
- package/dist/tools/artifacts/diff.js +267 -0
- package/dist/tools/artifacts/diff.js.map +1 -0
- package/dist/tools/artifacts/export.d.ts +43 -0
- package/dist/tools/artifacts/export.d.ts.map +1 -0
- package/dist/tools/artifacts/export.js +113 -0
- package/dist/tools/artifacts/export.js.map +1 -0
- package/dist/tools/artifacts/scan.d.ts +86 -0
- package/dist/tools/artifacts/scan.d.ts.map +1 -0
- package/dist/tools/artifacts/scan.js +251 -0
- package/dist/tools/artifacts/scan.js.map +1 -0
- package/dist/tools/artifacts/snippets.d.ts +42 -0
- package/dist/tools/artifacts/snippets.d.ts.map +1 -0
- package/dist/tools/artifacts/snippets.js +165 -0
- package/dist/tools/artifacts/snippets.js.map +1 -0
- package/dist/tools/batch.d.ts +65 -0
- package/dist/tools/batch.d.ts.map +1 -0
- package/dist/tools/batch.js +117 -0
- package/dist/tools/batch.js.map +1 -0
- package/dist/tools/briefs/common.d.ts +65 -0
- package/dist/tools/briefs/common.d.ts.map +1 -0
- package/dist/tools/briefs/common.js +128 -0
- package/dist/tools/briefs/common.js.map +1 -0
- package/dist/tools/briefs/evidence.d.ts +40 -0
- package/dist/tools/briefs/evidence.d.ts.map +1 -0
- package/dist/tools/briefs/evidence.js +103 -0
- package/dist/tools/briefs/evidence.js.map +1 -0
- package/dist/tools/changeBrief.d.ts +85 -0
- package/dist/tools/changeBrief.d.ts.map +1 -0
- package/dist/tools/changeBrief.js +228 -0
- package/dist/tools/changeBrief.js.map +1 -0
- package/dist/tools/chat.d.ts +42 -0
- package/dist/tools/chat.d.ts.map +1 -0
- package/dist/tools/chat.js +49 -0
- package/dist/tools/chat.js.map +1 -0
- package/dist/tools/classify.d.ts +56 -0
- package/dist/tools/classify.d.ts.map +1 -0
- package/dist/tools/classify.js +103 -0
- package/dist/tools/classify.js.map +1 -0
- package/dist/tools/corpusAnswer.d.ts +79 -0
- package/dist/tools/corpusAnswer.d.ts.map +1 -0
- package/dist/tools/corpusAnswer.js +259 -0
- package/dist/tools/corpusAnswer.js.map +1 -0
- package/dist/tools/corpusIndex.d.ts +30 -0
- package/dist/tools/corpusIndex.d.ts.map +1 -0
- package/dist/tools/corpusIndex.js +67 -0
- package/dist/tools/corpusIndex.js.map +1 -0
- package/dist/tools/corpusList.d.ts +19 -0
- package/dist/tools/corpusList.d.ts.map +1 -0
- package/dist/tools/corpusList.js +34 -0
- package/dist/tools/corpusList.js.map +1 -0
- package/dist/tools/corpusRefresh.d.ts +29 -0
- package/dist/tools/corpusRefresh.d.ts.map +1 -0
- package/dist/tools/corpusRefresh.js +58 -0
- package/dist/tools/corpusRefresh.js.map +1 -0
- package/dist/tools/corpusSearch.d.ts +44 -0
- package/dist/tools/corpusSearch.d.ts.map +1 -0
- package/dist/tools/corpusSearch.js +80 -0
- package/dist/tools/corpusSearch.js.map +1 -0
- package/dist/tools/draft.d.ts +38 -0
- package/dist/tools/draft.d.ts.map +1 -0
- package/dist/tools/draft.js +72 -0
- package/dist/tools/draft.js.map +1 -0
- package/dist/tools/embed.d.ts +27 -0
- package/dist/tools/embed.d.ts.map +1 -0
- package/dist/tools/embed.js +41 -0
- package/dist/tools/embed.js.map +1 -0
- package/dist/tools/embedSearch.d.ts +58 -0
- package/dist/tools/embedSearch.d.ts.map +1 -0
- package/dist/tools/embedSearch.js +76 -0
- package/dist/tools/embedSearch.js.map +1 -0
- package/dist/tools/extract.d.ts +58 -0
- package/dist/tools/extract.d.ts.map +1 -0
- package/dist/tools/extract.js +93 -0
- package/dist/tools/extract.js.map +1 -0
- package/dist/tools/incidentBrief.d.ts +81 -0
- package/dist/tools/incidentBrief.d.ts.map +1 -0
- package/dist/tools/incidentBrief.js +252 -0
- package/dist/tools/incidentBrief.js.map +1 -0
- package/dist/tools/packs/changePack.d.ts +166 -0
- package/dist/tools/packs/changePack.d.ts.map +1 -0
- package/dist/tools/packs/changePack.js +470 -0
- package/dist/tools/packs/changePack.js.map +1 -0
- package/dist/tools/packs/incidentPack.d.ts +125 -0
- package/dist/tools/packs/incidentPack.d.ts.map +1 -0
- package/dist/tools/packs/incidentPack.js +354 -0
- package/dist/tools/packs/incidentPack.js.map +1 -0
- package/dist/tools/packs/repoPack.d.ts +207 -0
- package/dist/tools/packs/repoPack.d.ts.map +1 -0
- package/dist/tools/packs/repoPack.js +456 -0
- package/dist/tools/packs/repoPack.js.map +1 -0
- package/dist/tools/repoBrief.d.ts +81 -0
- package/dist/tools/repoBrief.d.ts.map +1 -0
- package/dist/tools/repoBrief.js +213 -0
- package/dist/tools/repoBrief.js.map +1 -0
- package/dist/tools/research.d.ts +45 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +100 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/runner.d.ts +24 -0
- package/dist/tools/runner.d.ts.map +1 -0
- package/dist/tools/runner.js +47 -0
- package/dist/tools/runner.js.map +1 -0
- package/dist/tools/summarizeDeep.d.ts +49 -0
- package/dist/tools/summarizeDeep.d.ts.map +1 -0
- package/dist/tools/summarizeDeep.js +109 -0
- package/dist/tools/summarizeDeep.js.map +1 -0
- package/dist/tools/summarizeFast.d.ts +28 -0
- package/dist/tools/summarizeFast.d.ts.map +1 -0
- package/dist/tools/summarizeFast.js +43 -0
- package/dist/tools/summarizeFast.js.map +1 -0
- package/dist/tools/triageLogs.d.ts +51 -0
- package/dist/tools/triageLogs.d.ts.map +1 -0
- package/dist/tools/triageLogs.js +96 -0
- package/dist/tools/triageLogs.js.map +1 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +2 -0
- package/dist/version.js.map +1 -0
- package/package.json +60 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 mcp-tool-shop
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/mcp-tool-shop-org/brand/main/logos/ollama-intern-mcp/readme.png" alt="Ollama Intern MCP" width="500">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<a href="https://github.com/mcp-tool-shop-org/ollama-intern-mcp/actions"><img alt="CI" src="https://github.com/mcp-tool-shop-org/ollama-intern-mcp/actions/workflows/ci.yml/badge.svg"></a>
|
|
7
|
+
<a href="LICENSE"><img alt="MIT License" src="https://img.shields.io/badge/license-MIT-blue.svg"></a>
|
|
8
|
+
<a href="https://mcp-tool-shop-org.github.io/ollama-intern-mcp/"><img alt="Landing Page" src="https://img.shields.io/badge/landing-page-8b5cf6"></a>
|
|
9
|
+
<a href="https://mcp-tool-shop-org.github.io/ollama-intern-mcp/handbook/"><img alt="Handbook" src="https://img.shields.io/badge/handbook-docs-10b981"></a>
|
|
10
|
+
</p>
|
|
11
|
+
|
|
12
|
+
> **The local intern for Claude Code.** 28 job-shaped tools, evidence-first briefs, durable artifacts.
|
|
13
|
+
|
|
14
|
+
An MCP server that gives Claude Code a **local intern** with rules, tiers, a desk, and a filing cabinet. Claude picks the _tool_; the tool picks the _tier_ (Instant / Workhorse / Deep / Embed); the tier writes a file you can open next week.
|
|
15
|
+
|
|
16
|
+
No cloud. No telemetry. No "autonomous" anything. Every call shows its work.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Lead example — one call, one artifact
|
|
21
|
+
|
|
22
|
+
```jsonc
|
|
23
|
+
// Claude → ollama-intern-mcp
|
|
24
|
+
{
|
|
25
|
+
"tool": "ollama_incident_pack",
|
|
26
|
+
"arguments": {
|
|
27
|
+
"title": "sprite pipeline 5 AM paging regression",
|
|
28
|
+
"logs": "[2026-04-16 05:07] worker-3 OOM killed\n[2026-04-16 05:07] ollama /api/ps reports evicted=true size=8.1GB\n...",
|
|
29
|
+
"source_paths": ["F:/AI/sprite-foundry/src/worker.ts", "memory/sprite-foundry-visual-mastery.md"]
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Returns an envelope pointing at a file on disk:
|
|
35
|
+
|
|
36
|
+
```jsonc
|
|
37
|
+
{
|
|
38
|
+
"result": {
|
|
39
|
+
"pack": "incident",
|
|
40
|
+
"slug": "2026-04-16-sprite-pipeline-5-am-paging-regression",
|
|
41
|
+
"artifact_md": "~/.ollama-intern/artifacts/incident/2026-04-16-sprite-pipeline-5-am-paging-regression.md",
|
|
42
|
+
"artifact_json": "~/.ollama-intern/artifacts/incident/2026-04-16-sprite-pipeline-5-am-paging-regression.json",
|
|
43
|
+
"weak": false,
|
|
44
|
+
"evidence_count": 6,
|
|
45
|
+
"next_checks": ["residency.evicted across last 24h", "OLLAMA_MAX_LOADED_MODELS vs loaded size"]
|
|
46
|
+
},
|
|
47
|
+
"tier_used": "deep",
|
|
48
|
+
"model": "qwen2.5:14b-instruct-q4_K_M",
|
|
49
|
+
"hardware_profile": "dev-rtx5080",
|
|
50
|
+
"tokens_in": 4180, "tokens_out": 612,
|
|
51
|
+
"elapsed_ms": 8410,
|
|
52
|
+
"residency": { "in_vram": true, "evicted": false }
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
That markdown file is the intern's desk output — headings, evidence block with cited ids, investigative `next_checks`, `weak: true` banner if evidence is thin. It's deterministic: the renderer is code, not a prompt. Open it tomorrow, diff it next week, export it into a handbook with `ollama_artifact_export_to_path`.
|
|
57
|
+
|
|
58
|
+
Every competitor in this category leads with "save tokens." We lead with _here is the file the intern wrote._
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## What's in here — four tiers, 28 tools
|
|
63
|
+
|
|
64
|
+
| Tier | Count | What lives here |
|
|
65
|
+
|---|---|---|
|
|
66
|
+
| **Atoms** | 15 | Job-shaped primitives. `classify`, `extract`, `triage_logs`, `summarize_fast` / `deep`, `draft`, `research`, `corpus_search` / `answer` / `index` / `refresh` / `list`, `embed_search`, `embed`, `chat`. Batch-capable atoms (`classify`, `extract`, `triage_logs`) accept `items: [{id, text}]`. |
|
|
67
|
+
| **Briefs** | 3 | Evidence-backed structured operator briefs. `incident_brief`, `repo_brief`, `change_brief`. Every claim cites an evidence id; unknowns stripped server-side. Weak evidence surfaces `weak: true` rather than fake narrative. |
|
|
68
|
+
| **Packs** | 3 | Fixed-pipeline compound jobs that write durable markdown + JSON to `~/.ollama-intern/artifacts/`. `incident_pack`, `repo_pack`, `change_pack`. Deterministic renderers — no model calls on the artifact shape. |
|
|
69
|
+
| **Artifacts** | 7 | Continuity surface over pack outputs. `artifact_list` / `read` / `diff` / `export_to_path`, plus three deterministic snippets: `incident_note`, `onboarding_section`, `release_note`. |
|
|
70
|
+
|
|
71
|
+
Total: **18 primitives + 3 packs + 7 artifact tools = 28**.
|
|
72
|
+
|
|
73
|
+
Freeze lines:
|
|
74
|
+
- Atoms frozen at 18 (atoms + briefs). No new atom tools.
|
|
75
|
+
- Packs frozen at 3. No new pack types.
|
|
76
|
+
- Artifact tier frozen at 7.
|
|
77
|
+
|
|
78
|
+
The full tool reference lives in the [handbook](https://mcp-tool-shop-org.github.io/ollama-intern-mcp/handbook/reference/).
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Install
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
npm install -g ollama-intern-mcp
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Requires [Ollama](https://ollama.com) running locally and the tier models pulled.
|
|
89
|
+
|
|
90
|
+
### Claude Code
|
|
91
|
+
|
|
92
|
+
```json
|
|
93
|
+
{
|
|
94
|
+
"mcpServers": {
|
|
95
|
+
"ollama-intern": {
|
|
96
|
+
"command": "npx",
|
|
97
|
+
"args": ["-y", "ollama-intern-mcp"],
|
|
98
|
+
"env": {
|
|
99
|
+
"OLLAMA_HOST": "http://127.0.0.1:11434",
|
|
100
|
+
"INTERN_PROFILE": "dev-rtx5080"
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Claude Desktop
|
|
108
|
+
|
|
109
|
+
Same block, written to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows).
|
|
110
|
+
|
|
111
|
+
### Model pulls
|
|
112
|
+
|
|
113
|
+
**Default dev profile (RTX 5080 16GB and similar):**
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
ollama pull qwen2.5:7b-instruct-q4_K_M
|
|
117
|
+
ollama pull qwen2.5-coder:7b-instruct-q4_K_M
|
|
118
|
+
ollama pull qwen2.5:14b-instruct-q4_K_M
|
|
119
|
+
ollama pull nomic-embed-text
|
|
120
|
+
export OLLAMA_MAX_LOADED_MODELS=4
|
|
121
|
+
export OLLAMA_KEEP_ALIVE=-1
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**M5 Max profile (128GB unified):**
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
ollama pull qwen2.5:14b-instruct-q4_K_M
|
|
128
|
+
ollama pull qwen2.5-coder:32b-instruct-q4_K_M
|
|
129
|
+
ollama pull llama3.3:70b-instruct-q4_K_M
|
|
130
|
+
ollama pull nomic-embed-text
|
|
131
|
+
export INTERN_PROFILE=m5-max
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Per-tier env vars (`INTERN_TIER_INSTANT`, `INTERN_TIER_WORKHORSE`, `INTERN_TIER_DEEP`, `INTERN_EMBED_MODEL`) still override profile picks for one-offs.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Uniform envelope
|
|
139
|
+
|
|
140
|
+
Every tool returns the same shape:
|
|
141
|
+
|
|
142
|
+
```ts
|
|
143
|
+
{
|
|
144
|
+
result: <tool-specific>,
|
|
145
|
+
tier_used: "instant" | "workhorse" | "deep" | "embed",
|
|
146
|
+
model: string,
|
|
147
|
+
hardware_profile: string, // "dev-rtx5080" | "dev-rtx5080-llama" | "m5-max"
|
|
148
|
+
tokens_in: number,
|
|
149
|
+
tokens_out: number,
|
|
150
|
+
elapsed_ms: number,
|
|
151
|
+
residency: {
|
|
152
|
+
in_vram: boolean,
|
|
153
|
+
size_bytes: number,
|
|
154
|
+
size_vram_bytes: number,
|
|
155
|
+
evicted: boolean
|
|
156
|
+
} | null
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
`residency` comes from Ollama's `/api/ps`. When `evicted: true` or `size_vram < size`, the model paged to disk and inference dropped 5–10× — surface this to the user so they know to restart Ollama or trim loaded-model count.
|
|
161
|
+
|
|
162
|
+
Every call is logged as one NDJSON line to `~/.ollama-intern/log.ndjson`. Filter by `hardware_profile` to keep dev numbers out of publishable benchmarks.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Hardware profiles
|
|
167
|
+
|
|
168
|
+
| Profile | Instant | Workhorse | Deep | Embed |
|
|
169
|
+
|---|---|---|---|---|
|
|
170
|
+
| **`dev-rtx5080`** (default) | qwen2.5 7B | qwen2.5-coder 7B | qwen2.5 14B | nomic-embed-text |
|
|
171
|
+
| `dev-rtx5080-llama` | qwen2.5 7B | qwen2.5-coder 7B | **llama3.1 8B** | nomic-embed-text |
|
|
172
|
+
| `m5-max` | qwen2.5 14B | qwen2.5-coder 32B | llama3.3 70B | nomic-embed-text |
|
|
173
|
+
|
|
174
|
+
**Same-family ladder on default dev** so bad outputs are tool/design problems, not cross-family mismatches. `dev-rtx5080-llama` is the parity rail — run the same gold evals through Llama 8B before committing to Llama on the M5 Max.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Evidence laws
|
|
179
|
+
|
|
180
|
+
These are enforced in the server, not the prompt:
|
|
181
|
+
|
|
182
|
+
- **Citations required.** Every brief claim cites an evidence id.
|
|
183
|
+
- **Unknowns stripped server-side.** Models that cite ids not in the evidence bundle have those ids dropped with a warning before the result returns.
|
|
184
|
+
- **Weak is weak.** Thin evidence flags `weak: true` with coverage notes. Never smoothed into fake narrative.
|
|
185
|
+
- **Investigative, not prescriptive.** `next_checks` / `read_next` / `likely_breakpoints` only. Prompts forbid "apply this fix."
|
|
186
|
+
- **Deterministic renderers.** Artifact markdown shape is code, not a prompt. `draft` stays reserved for prose where model wording matters.
|
|
187
|
+
- **Same-pack diffs only.** Cross-pack `artifact_diff` is refused loudly; payloads stay distinct.
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Artifacts & continuity
|
|
192
|
+
|
|
193
|
+
Packs write to `~/.ollama-intern/artifacts/{incident,repo,change}/<slug>.(md|json)`. The artifact tier gives you a continuity surface without turning this into a file-management tool:
|
|
194
|
+
|
|
195
|
+
- `artifact_list` — metadata-only index, filterable by pack, date, slug glob
|
|
196
|
+
- `artifact_read` — typed read by `{pack, slug}` or `{json_path}`
|
|
197
|
+
- `artifact_diff` — structured same-pack comparison; weak-flip surfaced
|
|
198
|
+
- `artifact_export_to_path` — writes an existing artifact (with provenance header) to a caller-declared `allowed_roots`. Refuses existing files unless `overwrite: true`.
|
|
199
|
+
- `artifact_incident_note_snippet` — operator-note fragment
|
|
200
|
+
- `artifact_onboarding_section_snippet` — handbook fragment
|
|
201
|
+
- `artifact_release_note_snippet` — DRAFT release-note fragment
|
|
202
|
+
|
|
203
|
+
No model calls in this tier. All render from stored content.
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Threat model & telemetry
|
|
208
|
+
|
|
209
|
+
**Data touched:** file paths the caller explicitly hands in (`ollama_research`, corpus tools), inline text, and artifacts the caller asks to be written under `~/.ollama-intern/artifacts/` or a caller-declared `allowed_roots`.
|
|
210
|
+
|
|
211
|
+
**Data NOT touched:** anything outside `source_paths` / `allowed_roots`. `..` is rejected before normalize. `artifact_export_to_path` refuses existing files unless `overwrite: true`. Drafts targeting protected paths (`memory/`, `.claude/`, `docs/canon/`, etc.) require explicit `confirm_write: true`, enforced server-side.
|
|
212
|
+
|
|
213
|
+
**Network egress:** **off by default.** The only outbound traffic is to the local Ollama HTTP endpoint. No cloud calls, no update pings, no crash reporting.
|
|
214
|
+
|
|
215
|
+
**Telemetry:** **none.** Every call is logged as one NDJSON line to `~/.ollama-intern/log.ndjson` on your machine. Nothing leaves the box.
|
|
216
|
+
|
|
217
|
+
**Errors:** structured shape `{ code, message, hint, retryable }`. Stack traces are never exposed through tool results.
|
|
218
|
+
|
|
219
|
+
Full policy: [SECURITY.md](SECURITY.md).
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Standards
|
|
224
|
+
|
|
225
|
+
Built to the [Shipcheck](https://github.com/mcp-tool-shop-org/shipcheck) bar. Hard gates A–D pass; see [SHIP_GATE.md](SHIP_GATE.md) and [SCORECARD.md](SCORECARD.md).
|
|
226
|
+
|
|
227
|
+
- **A. Security** — SECURITY.md, threat model, no telemetry, path-safety, `confirm_write` on protected paths
|
|
228
|
+
- **B. Errors** — structured shape across all tool results; no raw stacks
|
|
229
|
+
- **C. Docs** — README current, CHANGELOG, LICENSE; tool schemas self-document
|
|
230
|
+
- **D. Hygiene** — `npm run verify` (395 tests), CI with dep scanning, Dependabot, lockfile, `engines.node`
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Roadmap (hardening, not scope creep)
|
|
235
|
+
|
|
236
|
+
- **Phase 1 — Delegation Spine** ✓ shipped: atom surface, uniform envelope, tiered routing, guardrails
|
|
237
|
+
- **Phase 2 — Truth Spine** ✓ shipped: schema v2 chunking, BM25 + RRF, living corpora, evidence-backed briefs, retrieval eval pack
|
|
238
|
+
- **Phase 3 — Pack & Artifact Spine** ✓ shipped: fixed-pipeline packs with durable artifacts + continuity tier
|
|
239
|
+
- **Phase 4 — Adoption Spine** — real-use observation on the RTX 5080, hardening the rough edges that surface
|
|
240
|
+
- **Phase 5 — M5 Max benchmarks** — publishable numbers once the hardware lands (~2026-04-24)
|
|
241
|
+
|
|
242
|
+
Phase by hardening layer. The atom/pack/artifact surface stays frozen.
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
MIT — see [LICENSE](LICENSE).
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
<p align="center">Built by <a href="https://mcp-tool-shop.github.io/">MCP Tool Shop</a></p>
|
package/SECURITY.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Threat Model
|
|
4
|
+
|
|
5
|
+
Ollama Intern MCP is a **local delegation layer**. It runs on the user's machine and talks only to a local Ollama instance (`http://localhost:11434` by default). No cloud calls, no telemetry.
|
|
6
|
+
|
|
7
|
+
The primary risks are not network-facing. They are:
|
|
8
|
+
|
|
9
|
+
1. **Hallucinated output trusted as truth.** Small local models fabricate. Mitigated by server-enforced citation stripping, confidence thresholds, `source_preview` in summaries, and compile checks on code drafts.
|
|
10
|
+
2. **Writes to protected truth surfaces.** Drafts must never overwrite canon, memory, or doctrine files by accident. Mitigated by a versioned protected-path list in [`src/protectedPaths.ts`](src/protectedPaths.ts) — writes targeting those paths require explicit `confirm_write: true`, enforced server-side (never prompt-side).
|
|
11
|
+
3. **Silent model eviction** (Ollama issue #13227). Inference quietly degrades 5–10× when a model pages to disk. Mitigated by surfacing `residency` in every call envelope so Claude can detect degradation mechanically.
|
|
12
|
+
4. **Path traversal in `ollama_research`.** Mitigated by validating every cited path against the `source_paths` input and stripping any unknown path before returning.
|
|
13
|
+
|
|
14
|
+
## Reporting
|
|
15
|
+
|
|
16
|
+
Email: 64996768+mcp-tool-shop@users.noreply.github.com
|
|
17
|
+
|
|
18
|
+
Please do not file public issues for security bugs. We will acknowledge within 72 hours.
|
|
19
|
+
|
|
20
|
+
## Supported Versions
|
|
21
|
+
|
|
22
|
+
Pre-1.0. Only the latest release receives security fixes.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Heading-aware document chunker.
|
|
3
|
+
*
|
|
4
|
+
* `chunk()` is the size-based sliding-window primitive — kept as the
|
|
5
|
+
* low-level splitter that operates within a single heading section.
|
|
6
|
+
*
|
|
7
|
+
* `chunkDocument()` is the real entry point: it walks the document,
|
|
8
|
+
* splits on heading boundaries first, preserves fenced code blocks
|
|
9
|
+
* intact, classifies each segment (frontmatter / heading / paragraph /
|
|
10
|
+
* code / list), attaches the heading_path breadcrumb, and only then
|
|
11
|
+
* size-splits oversized segments. This gives retrieval real metadata
|
|
12
|
+
* to rank on (heading_path match, chunk_type filter, title boost).
|
|
13
|
+
*
|
|
14
|
+
* Boundaries are preserved in absolute char offsets against the input
|
|
15
|
+
* text so callers can slice back into the source if needed.
|
|
16
|
+
*/
|
|
17
|
+
export type ChunkType = "heading" | "paragraph" | "code" | "list" | "frontmatter";
|
|
18
|
+
export interface Chunk {
|
|
19
|
+
index: number;
|
|
20
|
+
char_start: number;
|
|
21
|
+
char_end: number;
|
|
22
|
+
text: string;
|
|
23
|
+
heading_path: string[];
|
|
24
|
+
chunk_type: ChunkType;
|
|
25
|
+
}
|
|
26
|
+
export interface ChunkOptions {
|
|
27
|
+
chunk_chars: number;
|
|
28
|
+
chunk_overlap: number;
|
|
29
|
+
}
|
|
30
|
+
export declare const DEFAULT_CHUNK: ChunkOptions;
|
|
31
|
+
export interface ChunkedDocument {
|
|
32
|
+
title: string | null;
|
|
33
|
+
chunks: Chunk[];
|
|
34
|
+
}
|
|
35
|
+
interface RawChunk {
|
|
36
|
+
index: number;
|
|
37
|
+
char_start: number;
|
|
38
|
+
char_end: number;
|
|
39
|
+
text: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Primitive size-based sliding-window splitter. Used internally by
|
|
43
|
+
* chunkDocument() to size-split oversized heading sections; also exposed
|
|
44
|
+
* so existing tests can exercise window math directly.
|
|
45
|
+
*/
|
|
46
|
+
export declare function chunk(text: string, opts?: ChunkOptions): RawChunk[];
|
|
47
|
+
/**
|
|
48
|
+
* Heading-aware document chunker.
|
|
49
|
+
*
|
|
50
|
+
* Behavior:
|
|
51
|
+
* - YAML frontmatter at the very top becomes a single "frontmatter" chunk.
|
|
52
|
+
* - Every markdown heading (# through ######) updates a depth-indexed
|
|
53
|
+
* stack; chunks inherit the heading_path of the section they sit in.
|
|
54
|
+
* - Fenced code blocks (``` or ~~~) are preserved intact — never split.
|
|
55
|
+
* - Non-code sections are classified as "list" if majority of non-empty
|
|
56
|
+
* lines are list items, else "paragraph".
|
|
57
|
+
* - Sections that exceed chunk_chars are size-split via chunk() with
|
|
58
|
+
* overlap; sub-chunks inherit the parent's heading_path + type.
|
|
59
|
+
* - Title = first H1 heading after frontmatter, else null.
|
|
60
|
+
*/
|
|
61
|
+
export declare function chunkDocument(text: string, opts?: ChunkOptions): ChunkedDocument;
|
|
62
|
+
export {};
|
|
63
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/corpus/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,MAAM,GAAG,MAAM,GAAG,aAAa,CAAC;AAElF,MAAM,WAAW,KAAK;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,UAAU,EAAE,SAAS,CAAC;CACvB;AAED,MAAM,WAAW,YAAY;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,eAAO,MAAM,aAAa,EAAE,YAG3B,CAAC;AAEF,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,MAAM,EAAE,KAAK,EAAE,CAAC;CACjB;AAED,UAAU,QAAQ;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;GAIG;AACH,wBAAgB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,YAA4B,GAAG,QAAQ,EAAE,CAwBlF;AAuED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,aAAa,CAC3B,IAAI,EAAE,MAAM,EACZ,IAAI,GAAE,YAA4B,GACjC,eAAe,CAmIjB"}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Heading-aware document chunker.
|
|
3
|
+
*
|
|
4
|
+
* `chunk()` is the size-based sliding-window primitive — kept as the
|
|
5
|
+
* low-level splitter that operates within a single heading section.
|
|
6
|
+
*
|
|
7
|
+
* `chunkDocument()` is the real entry point: it walks the document,
|
|
8
|
+
* splits on heading boundaries first, preserves fenced code blocks
|
|
9
|
+
* intact, classifies each segment (frontmatter / heading / paragraph /
|
|
10
|
+
* code / list), attaches the heading_path breadcrumb, and only then
|
|
11
|
+
* size-splits oversized segments. This gives retrieval real metadata
|
|
12
|
+
* to rank on (heading_path match, chunk_type filter, title boost).
|
|
13
|
+
*
|
|
14
|
+
* Boundaries are preserved in absolute char offsets against the input
|
|
15
|
+
* text so callers can slice back into the source if needed.
|
|
16
|
+
*/
|
|
17
|
+
export const DEFAULT_CHUNK = {
|
|
18
|
+
chunk_chars: 800,
|
|
19
|
+
chunk_overlap: 100,
|
|
20
|
+
};
|
|
21
|
+
/**
|
|
22
|
+
* Primitive size-based sliding-window splitter. Used internally by
|
|
23
|
+
* chunkDocument() to size-split oversized heading sections; also exposed
|
|
24
|
+
* so existing tests can exercise window math directly.
|
|
25
|
+
*/
|
|
26
|
+
export function chunk(text, opts = DEFAULT_CHUNK) {
|
|
27
|
+
const size = Math.max(100, opts.chunk_chars);
|
|
28
|
+
const overlap = Math.min(opts.chunk_overlap, Math.floor(size / 2));
|
|
29
|
+
if (text.length === 0)
|
|
30
|
+
return [];
|
|
31
|
+
if (text.length <= size) {
|
|
32
|
+
return [{ index: 0, char_start: 0, char_end: text.length, text }];
|
|
33
|
+
}
|
|
34
|
+
const out = [];
|
|
35
|
+
const step = size - overlap;
|
|
36
|
+
let cursor = 0;
|
|
37
|
+
let i = 0;
|
|
38
|
+
while (cursor < text.length) {
|
|
39
|
+
const end = Math.min(cursor + size, text.length);
|
|
40
|
+
out.push({
|
|
41
|
+
index: i,
|
|
42
|
+
char_start: cursor,
|
|
43
|
+
char_end: end,
|
|
44
|
+
text: text.slice(cursor, end),
|
|
45
|
+
});
|
|
46
|
+
if (end === text.length)
|
|
47
|
+
break;
|
|
48
|
+
cursor += step;
|
|
49
|
+
i += 1;
|
|
50
|
+
}
|
|
51
|
+
return out;
|
|
52
|
+
}
|
|
53
|
+
const HEADING_RX = /^(#{1,6})\s+(.+?)\s*$/;
|
|
54
|
+
const CODE_FENCE_RX = /^(```|~~~)/;
|
|
55
|
+
const LIST_ITEM_RX = /^\s*([-*+]|\d+[.)])\s+/;
|
|
56
|
+
function isHeadingLine(line) {
|
|
57
|
+
const m = HEADING_RX.exec(line);
|
|
58
|
+
if (!m)
|
|
59
|
+
return null;
|
|
60
|
+
return { depth: m[1].length, text: m[2] };
|
|
61
|
+
}
|
|
62
|
+
function isCodeFence(line) {
|
|
63
|
+
return CODE_FENCE_RX.test(line.trim());
|
|
64
|
+
}
|
|
65
|
+
function isListItem(line) {
|
|
66
|
+
return LIST_ITEM_RX.test(line);
|
|
67
|
+
}
|
|
68
|
+
function isListMajority(content) {
|
|
69
|
+
const lines = content.split(/\r?\n/).map((l) => l.replace(/\r$/, ""));
|
|
70
|
+
const nonEmpty = lines.filter((l) => l.trim().length > 0 && isHeadingLine(l) === null);
|
|
71
|
+
if (nonEmpty.length === 0)
|
|
72
|
+
return false;
|
|
73
|
+
const listCount = nonEmpty.filter(isListItem).length;
|
|
74
|
+
return listCount * 2 > nonEmpty.length;
|
|
75
|
+
}
|
|
76
|
+
/** Match YAML frontmatter at the very start of the document. */
|
|
77
|
+
function extractFrontmatter(text) {
|
|
78
|
+
if (!text.startsWith("---"))
|
|
79
|
+
return null;
|
|
80
|
+
// Require newline right after leading ---
|
|
81
|
+
const afterOpen = text.indexOf("\n", 3);
|
|
82
|
+
if (afterOpen === -1)
|
|
83
|
+
return null;
|
|
84
|
+
// Find closing --- on its own line.
|
|
85
|
+
const rest = text.slice(afterOpen + 1);
|
|
86
|
+
const closeRx = /(^|\n)---\s*(\n|$)/;
|
|
87
|
+
const m = closeRx.exec(rest);
|
|
88
|
+
if (!m)
|
|
89
|
+
return null;
|
|
90
|
+
const closeStart = afterOpen + 1 + m.index + (m[1] === "" ? 0 : 1);
|
|
91
|
+
const closeEnd = afterOpen + 1 + m.index + m[0].length;
|
|
92
|
+
return { end: closeEnd, content: text.slice(0, closeEnd) };
|
|
93
|
+
}
|
|
94
|
+
function pushSegment(segments, type, headingPath, start, end, content) {
|
|
95
|
+
const trimmed = content.replace(/^\s+|\s+$/g, "");
|
|
96
|
+
if (trimmed.length === 0)
|
|
97
|
+
return;
|
|
98
|
+
segments.push({
|
|
99
|
+
type,
|
|
100
|
+
heading_path: headingPath,
|
|
101
|
+
char_start: start,
|
|
102
|
+
char_end: end,
|
|
103
|
+
content: trimmed,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Heading-aware document chunker.
|
|
108
|
+
*
|
|
109
|
+
* Behavior:
|
|
110
|
+
* - YAML frontmatter at the very top becomes a single "frontmatter" chunk.
|
|
111
|
+
* - Every markdown heading (# through ######) updates a depth-indexed
|
|
112
|
+
* stack; chunks inherit the heading_path of the section they sit in.
|
|
113
|
+
* - Fenced code blocks (``` or ~~~) are preserved intact — never split.
|
|
114
|
+
* - Non-code sections are classified as "list" if majority of non-empty
|
|
115
|
+
* lines are list items, else "paragraph".
|
|
116
|
+
* - Sections that exceed chunk_chars are size-split via chunk() with
|
|
117
|
+
* overlap; sub-chunks inherit the parent's heading_path + type.
|
|
118
|
+
* - Title = first H1 heading after frontmatter, else null.
|
|
119
|
+
*/
|
|
120
|
+
export function chunkDocument(text, opts = DEFAULT_CHUNK) {
|
|
121
|
+
if (text.length === 0)
|
|
122
|
+
return { title: null, chunks: [] };
|
|
123
|
+
const segments = [];
|
|
124
|
+
let title = null;
|
|
125
|
+
let cursor = 0;
|
|
126
|
+
const fm = extractFrontmatter(text);
|
|
127
|
+
if (fm) {
|
|
128
|
+
pushSegment(segments, "frontmatter", [], 0, fm.end, fm.content);
|
|
129
|
+
cursor = fm.end;
|
|
130
|
+
}
|
|
131
|
+
const headingStack = [null, null, null, null, null, null];
|
|
132
|
+
const getPath = () => headingStack.filter((h) => h !== null);
|
|
133
|
+
let bufStart = cursor;
|
|
134
|
+
let bufEnd = cursor;
|
|
135
|
+
let bufText = "";
|
|
136
|
+
let inCode = false;
|
|
137
|
+
let codeStart = 0;
|
|
138
|
+
const flushBuf = () => {
|
|
139
|
+
if (bufText.length === 0)
|
|
140
|
+
return;
|
|
141
|
+
const type = isListMajority(bufText) ? "list" : "paragraph";
|
|
142
|
+
pushSegment(segments, type, getPath(), bufStart, bufEnd, bufText);
|
|
143
|
+
bufText = "";
|
|
144
|
+
};
|
|
145
|
+
// Line walker over text[cursor..].
|
|
146
|
+
let pos = cursor;
|
|
147
|
+
while (pos <= text.length) {
|
|
148
|
+
const nl = text.indexOf("\n", pos);
|
|
149
|
+
const lineEnd = nl === -1 ? text.length : nl;
|
|
150
|
+
const line = text.slice(pos, lineEnd).replace(/\r$/, "");
|
|
151
|
+
const lineStart = pos;
|
|
152
|
+
const nextPos = nl === -1 ? text.length + 1 : nl + 1;
|
|
153
|
+
if (inCode) {
|
|
154
|
+
if (isCodeFence(line)) {
|
|
155
|
+
const codeEnd = nextPos - (nl === -1 ? 1 : 0);
|
|
156
|
+
pushSegment(segments, "code", getPath(), codeStart, codeEnd, text.slice(codeStart, codeEnd));
|
|
157
|
+
inCode = false;
|
|
158
|
+
bufStart = nextPos;
|
|
159
|
+
bufEnd = nextPos;
|
|
160
|
+
}
|
|
161
|
+
pos = nextPos;
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
if (isCodeFence(line)) {
|
|
165
|
+
flushBuf();
|
|
166
|
+
inCode = true;
|
|
167
|
+
codeStart = lineStart;
|
|
168
|
+
pos = nextPos;
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
const heading = isHeadingLine(line);
|
|
172
|
+
if (heading) {
|
|
173
|
+
flushBuf();
|
|
174
|
+
for (let d = heading.depth + 1; d <= 6; d++)
|
|
175
|
+
headingStack[d - 1] = null;
|
|
176
|
+
headingStack[heading.depth - 1] = heading.text;
|
|
177
|
+
if (title === null && heading.depth === 1)
|
|
178
|
+
title = heading.text;
|
|
179
|
+
bufStart = lineStart;
|
|
180
|
+
bufEnd = nextPos;
|
|
181
|
+
bufText = line;
|
|
182
|
+
pos = nextPos;
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
if (bufText.length === 0) {
|
|
186
|
+
bufStart = lineStart;
|
|
187
|
+
bufText = line;
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
bufText += "\n" + line;
|
|
191
|
+
}
|
|
192
|
+
bufEnd = nextPos;
|
|
193
|
+
pos = nextPos;
|
|
194
|
+
}
|
|
195
|
+
if (inCode) {
|
|
196
|
+
pushSegment(segments, "code", getPath(), codeStart, text.length, text.slice(codeStart));
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
flushBuf();
|
|
200
|
+
}
|
|
201
|
+
// Size-split oversized segments; never cross heading boundaries.
|
|
202
|
+
const chunks = [];
|
|
203
|
+
let idx = 0;
|
|
204
|
+
for (const seg of segments) {
|
|
205
|
+
if (seg.content.length <= opts.chunk_chars || seg.type === "code") {
|
|
206
|
+
chunks.push({
|
|
207
|
+
index: idx++,
|
|
208
|
+
char_start: seg.char_start,
|
|
209
|
+
char_end: seg.char_end,
|
|
210
|
+
text: seg.content,
|
|
211
|
+
heading_path: seg.heading_path,
|
|
212
|
+
chunk_type: seg.type,
|
|
213
|
+
});
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
const sub = chunk(seg.content, opts);
|
|
217
|
+
for (const s of sub) {
|
|
218
|
+
chunks.push({
|
|
219
|
+
index: idx++,
|
|
220
|
+
char_start: seg.char_start + s.char_start,
|
|
221
|
+
char_end: seg.char_start + s.char_end,
|
|
222
|
+
text: s.text,
|
|
223
|
+
heading_path: seg.heading_path,
|
|
224
|
+
chunk_type: seg.type,
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return { title, chunks };
|
|
229
|
+
}
|
|
230
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/corpus/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAkBH,MAAM,CAAC,MAAM,aAAa,GAAiB;IACzC,WAAW,EAAE,GAAG;IAChB,aAAa,EAAE,GAAG;CACnB,CAAC;AAcF;;;;GAIG;AACH,MAAM,UAAU,KAAK,CAAC,IAAY,EAAE,OAAqB,aAAa;IACpE,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;IACnE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACjC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;QACxB,OAAO,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;IACpE,CAAC;IACD,MAAM,GAAG,GAAe,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC;IAC5B,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACjD,GAAG,CAAC,IAAI,CAAC;YACP,KAAK,EAAE,CAAC;YACR,UAAU,EAAE,MAAM;YAClB,QAAQ,EAAE,GAAG;YACb,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC;SAC9B,CAAC,CAAC;QACH,IAAI,GAAG,KAAK,IAAI,CAAC,MAAM;YAAE,MAAM;QAC/B,MAAM,IAAI,IAAI,CAAC;QACf,CAAC,IAAI,CAAC,CAAC;IACT,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAUD,MAAM,UAAU,GAAG,uBAAuB,CAAC;AAC3C,MAAM,aAAa,GAAG,YAAY,CAAC;AACnC,MAAM,YAAY,GAAG,wBAAwB,CAAC;AAE9C,SAAS,aAAa,CAAC,IAAY;IACjC,MAAM,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC5C,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,OAAO,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;AACzC,CAAC;AAED,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,cAAc,CAAC,OAAe;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,IAAI,aAAa,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IACvF,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;IACrD,OAAO,SAAS,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;AACzC,CAAC;AAED,gEAAgE;AAChE,SAAS,kBAAkB,CAAC,IAAY;IACtC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACzC,0CAA0C;IAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IACxC,IAAI,SAAS,KAAK,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAClC,oCAAoC;IACpC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IACvC,MAAM,OAAO,GAAG,oBAAoB,CAAC;IACrC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,MAAM,UAAU,GAAG,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACvD,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,CAAC;AAC7D,CAAC;AAED,SAAS,WAAW,CAClB,QAAmB,EACnB,IAAe,EACf,WAAqB,EACrB,KAAa,EACb,GAAW,EACX,OAAe;IAEf,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;IAClD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IACjC,QAAQ,CAAC,IAAI,CAAC;QACZ,IAAI;QACJ,YAAY,EAAE,WAAW;QACzB,UAAU,EAAE,KAAK;QACjB,QAAQ,EAAE,GAAG;QACb,OAAO,EAAE,OAAO;KACjB,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,aAAa,CAC3B,IAAY,EACZ,OAAqB,aAAa;IAElC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,IAAI,KAAK,GAAkB,IAAI,CAAC;IAEhC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,EAAE,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IACpC,IAAI,EAAE,EAAE,CAAC;QACP,WAAW,CAAC,QAAQ,EAAE,aAAa,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC;QAChE,MAAM,GAAG,EAAE,CAAC,GAAG,CAAC;IAClB,CAAC;IAED,MAAM,YAAY,GAAsB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;IAC7E,MAAM,OAAO,GAAG,GAAa,EAAE,CAC7B,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAEtD,IAAI,QAAQ,GAAG,MAAM,CAAC;IACtB,IAAI,MAAM,GAAG,MAAM,CAAC;IACpB,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,QAAQ,GAAG,GAAS,EAAE;QAC1B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QACjC,MAAM,IAAI,GAAc,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC;QACvE,WAAW,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAClE,OAAO,GAAG,EAAE,CAAC;IACf,CAAC,CAAC;IAEF,mCAAmC;IACnC,IAAI,GAAG,GAAG,MAAM,CAAC;IACjB,OAAO,GAAG,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnC,MAAM,OAAO,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACzD,MAAM,SAAS,GAAG,GAAG,CAAC;QACtB,MAAM,OAAO,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAErD,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9C,WAAW,CACT,QAAQ,EACR,MAAM,EACN,OAAO,EAAE,EACT,SAAS,EACT,OAAO,EACP,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CAC/B,CAAC;gBACF,MAAM,GAAG,KAAK,CAAC;gBACf,QAAQ,GAAG,OAAO,CAAC;gBACnB,MAAM,GAAG,OAAO,CAAC;YACnB,CAAC;YACD,GAAG,GAAG,OAAO,CAAC;YACd,SAAS;QACX,CAAC;QAED,IAAI,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;YACtB,QAAQ,EAAE,CAAC;YACX,MAAM,GAAG,IAAI,CAAC;YACd,SAAS,GAAG,SAAS,CAAC;YACtB,GAAG,GAAG,OAAO,CAAC;YACd,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,OAAO,EAAE,CAAC;YACZ,QAAQ,EAAE,CAAC;YACX,KAAK,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;gBAAE,YAAY,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;YACxE,YAAY,CAAC,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAC/C,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,CAAC,KAAK,KAAK,CAAC;gBAAE,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC;YAChE,QAAQ,GAAG,SAAS,CAAC;YACrB,MAAM,GAAG,OAAO,CAAC;YACjB,OAAO,GAAG,IAAI,CAAC;YACf,GAAG,GAAG,OAAO,CAAC;YACd,SAAS;QACX,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,QAAQ,GAAG,SAAS,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,IAAI,GAAG,IAAI,CAAC;QACzB,CAAC;QACD,MAAM,GAAG,OAAO,CAAC;QACjB,GAAG,GAAG,OAAO,CAAC;IAChB,CAAC;IAED,IAAI,MAAM,EAAE,CAAC;QACX,WAAW,CACT,QAAQ,EACR,MAAM,EACN,OAAO,EAAE,EACT,SAAS,EACT,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CACtB,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,QAAQ,EAAE,CAAC;IACb,CAAC;IAED,iEAAiE;IACjE,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,IAAI,GAAG,CAAC,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,WAAW,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAClE,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,GAAG,EAAE;gBACZ,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,QAAQ,EAAE,GAAG,CAAC,QAAQ;gBACtB,IAAI,EAAE,GAAG,CAAC,OAAO;gBACjB,YAAY,EAAE,GAAG,CAAC,YAAY;gBAC9B,UAAU,EAAE,GAAG,CAAC,IAAI;aACrB,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QACrC,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YACpB,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,GAAG,EAAE;gBACZ,UAAU,EAAE,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU;gBACzC,QAAQ,EAAE,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,QAAQ;gBACrC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,YAAY,EAAE,GAAG,CAAC,YAAY;gBAC9B,UAAU,EAAE,GAAG,CAAC,IAAI;aACrB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;AAC3B,CAAC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Score fusion primitives — pure, deterministic, no I/O.
|
|
3
|
+
*
|
|
4
|
+
* RRF (Reciprocal Rank Fusion) combines multiple ranked lists into one.
|
|
5
|
+
* It is the standard fusion strategy in modern hybrid retrieval because
|
|
6
|
+
* it needs no score calibration — only ranks — so BM25 and cosine can
|
|
7
|
+
* be safely combined without learning a joint normalization.
|
|
8
|
+
*
|
|
9
|
+
* `applyFactBoost` is the "fact mode" reranker: a dominant multiplier
|
|
10
|
+
* for exact-substring matches plus a secondary multiplier for short
|
|
11
|
+
* chunks. Boost, never filter — chunks without a match keep their
|
|
12
|
+
* fused score and stay in the result so a near-miss query never
|
|
13
|
+
* collapses to empty.
|
|
14
|
+
*/
|
|
15
|
+
export interface Ranked {
|
|
16
|
+
/** Opaque chunk identifier. */
|
|
17
|
+
chunkId: string;
|
|
18
|
+
/** 1-indexed rank within the source list. */
|
|
19
|
+
rank: number;
|
|
20
|
+
}
|
|
21
|
+
export interface FusionList {
|
|
22
|
+
ranked: Ranked[];
|
|
23
|
+
/** Defaults to 1.0. Weighting is multiplicative on the RRF term. */
|
|
24
|
+
weight?: number;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Reciprocal Rank Fusion. Score for doc d =
|
|
28
|
+
* sum over input lists i: weight_i / (K + rank_i(d))
|
|
29
|
+
* where K is a smoothing constant (default 60 — the canonical value from
|
|
30
|
+
* Cormack et al. 2009). A doc that appears in only one list is still
|
|
31
|
+
* scored; a doc absent from every list never appears in the output.
|
|
32
|
+
*/
|
|
33
|
+
export declare function rrfFuse(lists: FusionList[], k?: number): Map<string, number>;
|
|
34
|
+
/**
|
|
35
|
+
* Convert a score-sorted list into a rank list. Ties are broken by the
|
|
36
|
+
* order the scores arrive — callers are responsible for passing in a
|
|
37
|
+
* deterministically sorted list. Ranks are 1-indexed as RRF expects.
|
|
38
|
+
*/
|
|
39
|
+
export declare function toRanked(scored: Array<{
|
|
40
|
+
chunkId: string;
|
|
41
|
+
}>): Ranked[];
|
|
42
|
+
export interface FactBoostOptions {
|
|
43
|
+
/** Multiplier applied when the chunk text contains the query substring (case-insensitive). */
|
|
44
|
+
exactSubstringMultiplier?: number;
|
|
45
|
+
/** Multiplier range for short chunks. Shorter = higher multiplier. */
|
|
46
|
+
shortChunkMaxMultiplier?: number;
|
|
47
|
+
/** Chunks at or below this length receive the full short-chunk boost. */
|
|
48
|
+
shortChunkFloorChars?: number;
|
|
49
|
+
/** Chunks at or above this length receive no short-chunk boost. */
|
|
50
|
+
shortChunkCeilingChars?: number;
|
|
51
|
+
}
|
|
52
|
+
export interface FactBoostInput {
|
|
53
|
+
chunkId: string;
|
|
54
|
+
score: number;
|
|
55
|
+
}
|
|
56
|
+
export interface FactBoostContext {
|
|
57
|
+
query: string;
|
|
58
|
+
chunkText: Map<string, string>;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Apply the fact-mode reranker. Returns a new list; input is not mutated.
|
|
62
|
+
* Dominant boost = exact substring match (multiplier ≈ 2.5x by default).
|
|
63
|
+
* Secondary boost = short-chunk preference (up to ≈ 1.15x, decays to 1.0).
|
|
64
|
+
* Non-matching chunks keep their fused score unchanged and stay in the list.
|
|
65
|
+
*/
|
|
66
|
+
export declare function applyFactBoost(scored: FactBoostInput[], ctx: FactBoostContext, opts?: FactBoostOptions): FactBoostInput[];
|
|
67
|
+
//# sourceMappingURL=fusion.d.ts.map
|