@jcode.labs/mimir 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/CONTRIBUTING.md +28 -0
- package/README.md +307 -32
- package/SECURITY-HARDENING.md +194 -0
- package/SECURITY.md +21 -0
- package/dist/access-log.d.ts +10 -0
- package/dist/access-log.d.ts.map +1 -0
- package/dist/access-log.js +29 -0
- package/dist/access-log.js.map +1 -0
- package/dist/chunking.d.ts.map +1 -1
- package/dist/chunking.js +6 -3
- package/dist/chunking.js.map +1 -1
- package/dist/cli.js +151 -5
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +83 -20
- package/dist/config.js.map +1 -1
- package/dist/defaults.d.ts +11 -0
- package/dist/defaults.d.ts.map +1 -0
- package/dist/defaults.js +31 -0
- package/dist/defaults.js.map +1 -0
- package/dist/destroy.d.ts +3 -0
- package/dist/destroy.d.ts.map +1 -0
- package/dist/destroy.js +16 -0
- package/dist/destroy.js.map +1 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/embeddings.js +85 -9
- package/dist/embeddings.js.map +1 -1
- package/dist/files.d.ts +2 -1
- package/dist/files.d.ts.map +1 -1
- package/dist/files.js +40 -3
- package/dist/files.js.map +1 -1
- package/dist/gitignore.d.ts +1 -1
- package/dist/gitignore.d.ts.map +1 -1
- package/dist/gitignore.js +8 -7
- package/dist/gitignore.js.map +1 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/ingest.d.ts.map +1 -1
- package/dist/ingest.js +14 -2
- package/dist/ingest.js.map +1 -1
- package/dist/init.d.ts.map +1 -1
- package/dist/init.js +4 -15
- package/dist/init.js.map +1 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +27 -15
- package/dist/mcp.js.map +1 -1
- package/dist/parsing.d.ts.map +1 -1
- package/dist/parsing.js +138 -0
- package/dist/parsing.js.map +1 -1
- package/dist/query.d.ts.map +1 -1
- package/dist/query.js +28 -20
- package/dist/query.js.map +1 -1
- package/dist/redaction.d.ts +7 -0
- package/dist/redaction.d.ts.map +1 -0
- package/dist/redaction.js +63 -0
- package/dist/redaction.js.map +1 -0
- package/dist/security.d.ts +3 -0
- package/dist/security.d.ts.map +1 -0
- package/dist/security.js +84 -0
- package/dist/security.js.map +1 -0
- package/dist/skill.d.ts +2 -1
- package/dist/skill.d.ts.map +1 -1
- package/dist/skill.js +24 -9
- package/dist/skill.js.map +1 -1
- package/dist/store.d.ts.map +1 -1
- package/dist/store.js +2 -1
- package/dist/store.js.map +1 -1
- package/dist/types.d.ts +68 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/examples/sovereign-rag-demo/.kb/config.json +22 -0
- package/examples/sovereign-rag-demo/.kb/sources.txt +2 -0
- package/examples/sovereign-rag-demo/README.md +80 -0
- package/examples/sovereign-rag-demo/raw/dataset-inventory.csv +5 -0
- package/examples/sovereign-rag-demo/raw/incident-timeline.jsonl +4 -0
- package/examples/sovereign-rag-demo/raw/operations-brief.md +16 -0
- package/examples/sovereign-rag-demo/raw/review-notes.evidence +11 -0
- package/examples/sovereign-rag-demo/raw/security-policy.yaml +14 -0
- package/package.json +28 -25
- package/skills/mimir/SKILL.md +77 -6
- package/skills/mimir-audio-summary/SKILL.md +134 -0
- package/skills/mimir-audio-summary/forge-voice.sh +153 -0
- package/skills/mimir-audio-summary/split-lines.py +13 -0
- package/skills/mimir-audio-summary/xtts-voice.py +46 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.4.0 - 2026-06-28
|
|
4
|
+
|
|
5
|
+
- Reposition Mimir as sovereign local RAG for confidential datasets and AI agents.
|
|
6
|
+
- Expand default ingestion to common text, Office/OpenDocument, data, config, log, and source-code
|
|
7
|
+
file types.
|
|
8
|
+
- Add `includeExtensions` / `KB_INCLUDE_EXTENSIONS` for custom UTF-8 text file extensions.
|
|
9
|
+
- Add the optional `mimir-audio-summary` bundled skill for confidential audio summaries.
|
|
10
|
+
- Install both the main Mimir skill and optional audio-summary skill with `kb install-skill`.
|
|
11
|
+
- Improve agent guidance for deep multi-query retrieval before synthesis.
|
|
12
|
+
- Make Mimir core retrieval-only: `kb ask` now returns cited context for external agents or LLMs
|
|
13
|
+
instead of generating answers internally.
|
|
14
|
+
- Add optional Transformers.js semantic embeddings through `embeddingProvider: "transformers"`.
|
|
15
|
+
- Remove Ollama providers and keep `embeddingProvider: "local-hash"` as the no-model default.
|
|
16
|
+
- Move the repository to a simple pnpm workspace monorepo without adding Turbo.
|
|
17
|
+
- Move the core `@jcode.labs/mimir` package into `packages/mimir`.
|
|
18
|
+
- Add `@jcode.labs/mimir-tts` for plug-and-play JS/ONNX WAV rendering without Python or ffmpeg.
|
|
19
|
+
- Add `kb audio` and update the audio-summary skill to use Mimir TTS before advanced fallback
|
|
20
|
+
engines.
|
|
21
|
+
|
|
22
|
+
## 0.3.0 - 2026-06-28
|
|
23
|
+
|
|
24
|
+
- Add confidentiality hardening defaults: built-in redaction before indexing, metadata-only access
|
|
25
|
+
logs, and bounded MCP retrieval.
|
|
26
|
+
- Add `kb security-audit` for zero-telemetry, provider, redaction, gitignore, storage, and
|
|
27
|
+
MCP posture checks.
|
|
28
|
+
- Add `kb destroy-index --yes` to remove generated vector indexes.
|
|
29
|
+
- Add release verification artifacts: npm tarball, SHA256 checksums, SBOM, and manifest.
|
|
30
|
+
- Document air-gapped operation, threat model, MCP hardening, and secure deletion limits.
|
|
31
|
+
|
|
32
|
+
## 0.2.1 - 2026-06-28
|
|
33
|
+
|
|
34
|
+
- Add GitHub Sponsors funding metadata and document suggested sponsor tiers.
|
|
35
|
+
- Add maintainer positioning for Jean-Baptiste Thery and JCode Labs in the README.
|
|
36
|
+
- Make `kb init` and `kb install-skill` automatically keep `.kb/` and `.mimir/`
|
|
37
|
+
ignored by Git.
|
|
38
|
+
|
|
39
|
+
## 0.2.0 - 2026-06-28
|
|
40
|
+
|
|
41
|
+
- Rename public product branding to Mimir while keeping the JCode Labs npm scope.
|
|
42
|
+
- Add the bundled portable `mimir` agent skill.
|
|
43
|
+
- Add the MCP stdio server with `mimir_status`, `mimir_search`, `mimir_ask`, and
|
|
44
|
+
`mimir_audit`.
|
|
45
|
+
- Add production smoke coverage for the built CLI and MCP server.
|
|
46
|
+
- Add Biome, commitlint, publint, CodeQL, Dependabot grouping, protected npm publishing,
|
|
47
|
+
and open-source contribution/security documentation.
|
package/CONTRIBUTING.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
Mimir is an open-source project under the MIT License. Issues and pull requests are welcome.
|
|
4
|
+
|
|
5
|
+
## Development
|
|
6
|
+
|
|
7
|
+
Use Node.js 20+ and pnpm:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pnpm install
|
|
11
|
+
pnpm validate
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
`pnpm validate` runs Biome, TypeScript, Vitest, the production CLI/MCP smoke test, and npm
|
|
15
|
+
package metadata checks.
|
|
16
|
+
|
|
17
|
+
## Pull Requests
|
|
18
|
+
|
|
19
|
+
- Open pull requests against `main`.
|
|
20
|
+
- Keep changes focused and include tests or smoke coverage for behavior changes.
|
|
21
|
+
- Do not commit private documents, generated vector stores, environment files, tokens, or
|
|
22
|
+
credentials.
|
|
23
|
+
- Use conventional commit messages such as `feat: add source parser` or
|
|
24
|
+
`fix: handle empty index`.
|
|
25
|
+
|
|
26
|
+
## Security
|
|
27
|
+
|
|
28
|
+
Do not report vulnerabilities through public issues. Follow [`SECURITY.md`](./SECURITY.md).
|
package/README.md
CHANGED
|
@@ -5,11 +5,18 @@
|
|
|
5
5
|
[](https://www.npmjs.com/package/@jcode.labs/mimir)
|
|
6
6
|
[](./LICENSE)
|
|
7
7
|
|
|
8
|
-
Open-source, local
|
|
8
|
+
Open-source, sovereign local RAG for confidential datasets and AI agents.
|
|
9
9
|
|
|
10
|
-
Mimir provides a TypeScript CLI and
|
|
11
|
-
repository. It indexes files from the target repository, stores
|
|
12
|
-
and
|
|
10
|
+
Mimir provides a TypeScript CLI, library, MCP server, and portable agent skills that can be
|
|
11
|
+
installed in any Node.js repository. It indexes local files from the target repository, stores
|
|
12
|
+
vectors locally with LanceDB, and can use either built-in local-hash retrieval or optional
|
|
13
|
+
Transformers.js semantic embeddings. Mimir core returns cited retrieval context; answer synthesis
|
|
14
|
+
belongs to the AI agent, LLM, or local model runtime you choose around it.
|
|
15
|
+
|
|
16
|
+
The intended use case is simple: put confidential company, institutional, legal, operational, or
|
|
17
|
+
research documents in a private local folder, index them locally, then let any compatible AI agent or
|
|
18
|
+
LLM workflow retrieve grounded context for summaries, briefs, audits, and decision support without
|
|
19
|
+
shipping the dataset to a hosted RAG service.
|
|
13
20
|
|
|
14
21
|
Created by Jean-Baptiste Thery and published under the JCode Labs npm scope.
|
|
15
22
|
|
|
@@ -41,22 +48,50 @@ Suggested GitHub Sponsors tiers:
|
|
|
41
48
|
|
|
42
49
|
Early public package. APIs may evolve before `1.0.0`.
|
|
43
50
|
|
|
51
|
+
## What Mimir Is For
|
|
52
|
+
|
|
53
|
+
- Build a local RAG knowledge base inside any repository.
|
|
54
|
+
- Analyze confidential datasets while keeping raw files and generated indexes local.
|
|
55
|
+
- Give Claude, Codex, Cursor, internal assistants, or other MCP-compatible tools the same private
|
|
56
|
+
retrieval layer.
|
|
57
|
+
- Retrieve grounded local evidence through CLI, library calls, MCP tools, or the bundled agent
|
|
58
|
+
skills so your chosen AI agent can produce cited summaries.
|
|
59
|
+
- Optionally create listenable WAV summaries with `kb audio`, `@jcode.labs/mimir-tts`, and the
|
|
60
|
+
bundled `mimir-audio-summary` skill.
|
|
61
|
+
|
|
62
|
+
Mimir is not a hosted SaaS, not a remote vector database, and not a certified high-assurance system.
|
|
63
|
+
For regulated or state-grade environments, pair it with encrypted disks, controlled machines, release
|
|
64
|
+
verification, and an external security review.
|
|
65
|
+
|
|
66
|
+
## Use Cases
|
|
67
|
+
|
|
68
|
+
Mimir is useful whenever the source material should stay local but an AI agent still needs grounded
|
|
69
|
+
context.
|
|
70
|
+
|
|
71
|
+
| Use case | Example questions |
|
|
72
|
+
| --- | --- |
|
|
73
|
+
| Understand a code repository | "Where is authentication implemented?", "What depends on this module?", "Summarize the payment flow." |
|
|
74
|
+
| Understand architecture | "What services exist?", "What are the data boundaries?", "Which components are risky to change?" |
|
|
75
|
+
| Analyze specifications | "What does the technical spec require?", "Which requirements are still unclear?", "Generate an implementation checklist." |
|
|
76
|
+
| Work through a request for proposal or tender | "What are the mandatory constraints?", "Which documents prove compliance?", "What risks should be clarified?" |
|
|
77
|
+
| Study courses and training material | "Summarize chapter three.", "Create revision questions.", "Compare these two concepts." |
|
|
78
|
+
| Analyze a book or long report | "Extract the main thesis.", "Find recurring arguments.", "Create a chapter-by-chapter brief." |
|
|
79
|
+
| Build an internal knowledge base | "What is the policy for incident review?", "Who owns this process?", "Which source says that?" |
|
|
80
|
+
| Prepare meetings or decisions | "Give me a one-page briefing.", "What is missing before deciding?", "List action items and evidence." |
|
|
81
|
+
| Ask questions over offline documents | "Which files mention local-only operation?", "What evidence supports this claim?" |
|
|
82
|
+
| Generate audio briefings | "Create a listenable summary of the current dossier using offline TTS." |
|
|
83
|
+
|
|
44
84
|
## Requirements
|
|
45
85
|
|
|
46
86
|
- Node.js 20+
|
|
47
87
|
- pnpm, npm, yarn or bun
|
|
48
|
-
-
|
|
49
|
-
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
Optional answer model:
|
|
56
|
-
|
|
57
|
-
```bash
|
|
58
|
-
ollama pull gemma4
|
|
59
|
-
```
|
|
88
|
+
- No model runtime is required for the default `embeddingProvider: "local-hash"` mode.
|
|
89
|
+
- Optional semantic embeddings use Transformers.js with local model files under `.mimir/models` by
|
|
90
|
+
default.
|
|
91
|
+
- Generated answers are intentionally outside Mimir core. Use Claude, Codex, OpenAI, a local model
|
|
92
|
+
MCP server, or another trusted model runtime to synthesize from Mimir's cited context.
|
|
93
|
+
- Optional audio summaries use the separate `@jcode.labs/mimir-tts` workspace package. It renders
|
|
94
|
+
WAV files with Transformers.js and does not require Python, ffmpeg, Piper, XTTS, or a local server.
|
|
60
95
|
|
|
61
96
|
## Install From npm
|
|
62
97
|
|
|
@@ -76,23 +111,26 @@ npm install --save-dev @jcode.labs/mimir
|
|
|
76
111
|
|
|
77
112
|
Maintainer tokens are only needed to publish new versions.
|
|
78
113
|
|
|
79
|
-
## Install From
|
|
114
|
+
## Install From Source Checkout
|
|
80
115
|
|
|
81
116
|
```bash
|
|
82
|
-
|
|
117
|
+
git clone git@github.com:jcode-works/jcode-mimir.git
|
|
118
|
+
cd jcode-mimir
|
|
119
|
+
pnpm install
|
|
120
|
+
pnpm build
|
|
83
121
|
```
|
|
84
122
|
|
|
85
123
|
For local development:
|
|
86
124
|
|
|
87
125
|
```bash
|
|
88
|
-
pnpm add -D file:../jcode-mimir
|
|
126
|
+
pnpm add -D file:../jcode-mimir/packages/mimir
|
|
89
127
|
```
|
|
90
128
|
|
|
91
129
|
Before creating an npm tarball later, run:
|
|
92
130
|
|
|
93
131
|
```bash
|
|
94
132
|
pnpm build
|
|
95
|
-
pnpm pack
|
|
133
|
+
pnpm --dir packages/mimir pack
|
|
96
134
|
```
|
|
97
135
|
|
|
98
136
|
## Use In Any Repository
|
|
@@ -110,6 +148,7 @@ pnpm exec kb ingest
|
|
|
110
148
|
pnpm exec kb search "vendor invoice status"
|
|
111
149
|
pnpm exec kb ask "What do the documents prove?"
|
|
112
150
|
pnpm exec kb audit
|
|
151
|
+
pnpm exec kb security-audit
|
|
113
152
|
pnpm exec kb status
|
|
114
153
|
```
|
|
115
154
|
|
|
@@ -121,12 +160,162 @@ npx kb ingest
|
|
|
121
160
|
npx kb search "vendor invoice status"
|
|
122
161
|
npx kb ask "What do the documents prove?"
|
|
123
162
|
npx kb audit
|
|
163
|
+
npx kb security-audit
|
|
124
164
|
npx kb status
|
|
125
165
|
```
|
|
126
166
|
|
|
127
|
-
##
|
|
167
|
+
## Choose A Retrieval Mode
|
|
168
|
+
|
|
169
|
+
Mimir has two embedding modes.
|
|
170
|
+
|
|
171
|
+
### Default Local Hash Retrieval
|
|
172
|
+
|
|
173
|
+
Use this when you want a fully local, no-model smoke test or a dependency-light setup. Retrieval is
|
|
174
|
+
lexical/hash-based, not semantic.
|
|
175
|
+
|
|
176
|
+
`.kb/config.json`:
|
|
177
|
+
|
|
178
|
+
```json
|
|
179
|
+
{
|
|
180
|
+
"embeddingProvider": "local-hash"
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Commands:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
pnpm exec kb ingest
|
|
188
|
+
pnpm exec kb search "offline retrieval approval"
|
|
189
|
+
pnpm exec kb ask "What evidence supports offline operation?"
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
`kb ask` always returns cited retrieved passages instead of a generated synthesis. You can pass those
|
|
193
|
+
passages to any LLM or agent you trust.
|
|
194
|
+
|
|
195
|
+
### Optional Semantic Embeddings With Transformers.js
|
|
196
|
+
|
|
197
|
+
Use this when you want better semantic retrieval while keeping Mimir core free of an LLM server.
|
|
198
|
+
|
|
199
|
+
`.kb/config.json`:
|
|
200
|
+
|
|
201
|
+
```json
|
|
202
|
+
{
|
|
203
|
+
"embeddingProvider": "transformers",
|
|
204
|
+
"embeddingModel": "mixedbread-ai/mxbai-embed-xsmall-v1",
|
|
205
|
+
"embeddingModelPath": ".mimir/models",
|
|
206
|
+
"transformersAllowRemoteModels": false
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Commands:
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
pnpm exec kb ingest
|
|
214
|
+
pnpm exec kb ask "Which passages support offline operation?"
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
Keep `transformersAllowRemoteModels` false for confidential or air-gapped work and preload model
|
|
218
|
+
files into `embeddingModelPath`. Set it to true only when you explicitly allow Transformers.js to
|
|
219
|
+
download model files from Hugging Face.
|
|
220
|
+
|
|
221
|
+
## Dependency Footprint
|
|
222
|
+
|
|
223
|
+
Mimir can run retrieval without a model runtime. Some runtime dependencies remain because they own
|
|
224
|
+
core features:
|
|
225
|
+
|
|
226
|
+
| Dependency | Why it remains |
|
|
227
|
+
| --- | --- |
|
|
228
|
+
| @huggingface/transformers | optional local semantic embeddings |
|
|
229
|
+
| LanceDB | local vector storage and nearest-neighbor retrieval |
|
|
230
|
+
| MCP SDK | MCP server for compatible agents |
|
|
231
|
+
| fast-glob | safe source-file discovery |
|
|
232
|
+
| unpdf, html-to-text, yaml, fflate | document parsing for PDF, HTML, YAML, Office/OpenDocument ZIP files |
|
|
233
|
+
| commander, zod, picocolors | CLI, config validation, readable terminal output |
|
|
234
|
+
|
|
235
|
+
Removing more dependencies is possible only by dropping features or replacing them with smaller
|
|
236
|
+
internal implementations. The current low-friction path is dependency-light at runtime for users who
|
|
237
|
+
choose `local-hash`, while preserving richer parsing, MCP support, and optional semantic embeddings.
|
|
238
|
+
|
|
239
|
+
## Example Test Workspace
|
|
240
|
+
|
|
241
|
+
This repository includes a synthetic example under
|
|
242
|
+
[`examples/sovereign-rag-demo`](./examples/sovereign-rag-demo). It can be used to test ingestion,
|
|
243
|
+
retrieval, `security-audit`, and custom text extensions without using private documents.
|
|
244
|
+
|
|
245
|
+
From a local checkout:
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
pnpm build
|
|
249
|
+
cd examples/sovereign-rag-demo
|
|
250
|
+
node ../../dist/cli.js security-audit
|
|
251
|
+
node ../../dist/cli.js ingest
|
|
252
|
+
node ../../dist/cli.js search "offline retrieval approval"
|
|
253
|
+
node ../../dist/cli.js audit
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
The example uses the default local-hash retrieval mode, so it can run without downloading an
|
|
257
|
+
embedding or chat model.
|
|
258
|
+
|
|
259
|
+
## Typical Workflows
|
|
260
|
+
|
|
261
|
+
### Understand A Codebase
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
pnpm exec kb init
|
|
265
|
+
printf "src\nREADME.md\ndocs\n" >> .kb/sources.txt
|
|
266
|
+
pnpm exec kb ingest
|
|
267
|
+
pnpm exec kb search "authentication flow"
|
|
268
|
+
pnpm exec kb ask "Explain the architecture and cite the relevant files."
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Analyze Specifications Or A Course
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
pnpm exec kb ingest
|
|
275
|
+
pnpm exec kb ask "Summarize the requirements and list open questions."
|
|
276
|
+
pnpm exec kb ask "Create revision questions from the indexed course material."
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### Work Offline
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
pnpm exec kb security-audit --strict
|
|
283
|
+
pnpm exec kb ingest
|
|
284
|
+
pnpm exec kb search "incident review policy"
|
|
285
|
+
pnpm exec kb ask "What does the local evidence prove?"
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
Use `embeddingProvider: "local-hash"` for a no-model offline workflow. Use
|
|
289
|
+
`embeddingProvider: "transformers"` with preloaded model files for semantic offline retrieval.
|
|
290
|
+
Generated answers should come from a trusted external agent or model runtime.
|
|
291
|
+
|
|
292
|
+
### Generate A Local Audio Briefing
|
|
293
|
+
|
|
294
|
+
Mimir includes a plug-and-play JS text-to-speech path for listenable summaries:
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
pnpm exec kb audio --doctor
|
|
298
|
+
pnpm exec kb audio /tmp/MIMIR-SUMMARY-project.txt --out .mimir/audio/project-summary.wav
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
The command writes WAV output locally and does not require Python or ffmpeg. The first render can
|
|
302
|
+
download a public Transformers.js-compatible model into `.mimir/models/tts`; the narration text is
|
|
303
|
+
processed locally. For confidential air-gapped work, preload model files and run:
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
pnpm exec kb audio /tmp/MIMIR-SUMMARY-project.txt --out .mimir/audio/project-summary.wav --offline
|
|
307
|
+
```
|
|
128
308
|
|
|
129
|
-
|
|
309
|
+
The standalone package can also be installed directly:
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
pnpm add -D @jcode.labs/mimir-tts
|
|
313
|
+
pnpm exec mimir-tts render /tmp/MIMIR-SUMMARY-project.txt --out .mimir/audio/project-summary.wav
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
## Agent Skills And MCP
|
|
317
|
+
|
|
318
|
+
Mimir ships with portable agent skills and a standard MCP server.
|
|
130
319
|
|
|
131
320
|
Install the agent kit into a repository:
|
|
132
321
|
|
|
@@ -138,12 +327,14 @@ This creates:
|
|
|
138
327
|
|
|
139
328
|
```plain text
|
|
140
329
|
.mimir/skills/mimir/SKILL.md
|
|
330
|
+
.mimir/skills/mimir-audio-summary/SKILL.md
|
|
141
331
|
.mimir/mcp.json
|
|
142
332
|
.mimir/README.md
|
|
143
333
|
```
|
|
144
334
|
|
|
145
|
-
Agents that support skill folders can load `.mimir/skills/mimir
|
|
146
|
-
|
|
335
|
+
Agents that support skill folders can load `.mimir/skills/mimir/` for deep local RAG usage.
|
|
336
|
+
Load `.mimir/skills/mimir-audio-summary/` only when an optional spoken summary is needed.
|
|
337
|
+
Other agents can read the generated `.mimir/README.md` and use the MCP config snippet.
|
|
147
338
|
|
|
148
339
|
Start the MCP server from the repository root:
|
|
149
340
|
|
|
@@ -157,6 +348,11 @@ MCP tools exposed:
|
|
|
157
348
|
- `mimir_search`
|
|
158
349
|
- `mimir_ask`
|
|
159
350
|
- `mimir_audit`
|
|
351
|
+
- `mimir_security_audit`
|
|
352
|
+
|
|
353
|
+
This MCP layer is the recommended way to let any compatible LLM or agent query the same local
|
|
354
|
+
knowledge base. The LLM does not need to know about LanceDB or the raw file layout; it asks Mimir for
|
|
355
|
+
ranked passages or cited context and uses the returned citations.
|
|
160
356
|
|
|
161
357
|
Print the bundled skill path from the installed package:
|
|
162
358
|
|
|
@@ -175,14 +371,46 @@ your-project/
|
|
|
175
371
|
.kb/config.json # local config
|
|
176
372
|
.kb/sources.txt # optional extra source paths
|
|
177
373
|
.kb/storage/ # generated LanceDB index
|
|
374
|
+
.kb/access.log # metadata-only access log
|
|
178
375
|
```
|
|
179
376
|
|
|
180
377
|
The package never ships project documents. `kb init` adds gitignore entries for `.kb/`
|
|
181
378
|
and `private/**`, and `kb install-skill` keeps `.mimir/` ignored as generated local agent
|
|
182
379
|
state.
|
|
183
380
|
|
|
381
|
+
## Confidentiality Defaults
|
|
382
|
+
|
|
383
|
+
Mimir is designed for private repositories and sensitive local evidence.
|
|
384
|
+
|
|
385
|
+
- Zero telemetry: no analytics or document content is sent to JCode Labs.
|
|
386
|
+
- No LLM generation in core: Mimir returns cited context for the agent/runtime you choose.
|
|
387
|
+
- Local-hash by default: no model runtime is required for the default retrieval path.
|
|
388
|
+
- Transformers.js remote model loading is disabled by default.
|
|
389
|
+
- Redaction before indexing: common secrets and identifiers are redacted before chunks are
|
|
390
|
+
embedded and stored.
|
|
391
|
+
- Metadata-only access logs: query hashes and action metadata are logged, not raw queries.
|
|
392
|
+
- MCP is read-focused and bounded by `mcpMaxTopK`.
|
|
393
|
+
- Generated local state is ignored by Git.
|
|
394
|
+
|
|
395
|
+
Run:
|
|
396
|
+
|
|
397
|
+
```bash
|
|
398
|
+
pnpm exec kb security-audit --strict
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
Remove the generated vector index:
|
|
402
|
+
|
|
403
|
+
```bash
|
|
404
|
+
pnpm exec kb destroy-index --yes
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
For air-gapped operation, release verification, secure deletion limits, and threat model details,
|
|
408
|
+
read [`SECURITY-HARDENING.md`](./SECURITY-HARDENING.md).
|
|
409
|
+
|
|
184
410
|
## Supported Files
|
|
185
411
|
|
|
412
|
+
Mimir supports common text, document, data, config, log, and source-code files out of the box:
|
|
413
|
+
|
|
186
414
|
- Markdown: `.md`, `.mdx`
|
|
187
415
|
- Text: `.txt`, `.text`
|
|
188
416
|
- JSON: `.json`
|
|
@@ -190,6 +418,32 @@ state.
|
|
|
190
418
|
- CSV/TSV: `.csv`, `.tsv`
|
|
191
419
|
- HTML: `.html`, `.htm`
|
|
192
420
|
- PDF: `.pdf`
|
|
421
|
+
- Office/OpenDocument: `.docx`, `.pptx`, `.xlsx`, `.odt`, `.ods`, `.odp`
|
|
422
|
+
- Rich text: `.rtf`
|
|
423
|
+
- Line data and logs: `.jsonl`, `.ndjson`, `.log`
|
|
424
|
+
- XML feeds and documents: `.xml`, `.rss`, `.atom`
|
|
425
|
+
- Config and data files: `.toml`, `.ini`, `.conf`, `.cfg`, `.properties`, `.sql`
|
|
426
|
+
- Source code: `.ts`, `.tsx`, `.js`, `.jsx`, `.py`, `.go`, `.rs`, `.java`, `.rb`, `.php`,
|
|
427
|
+
`.cs`, `.c`, `.cpp`, `.h`, `.css`
|
|
428
|
+
|
|
429
|
+
Custom UTF-8 text extensions can be enabled without changing code:
|
|
430
|
+
|
|
431
|
+
```json
|
|
432
|
+
{
|
|
433
|
+
"includeExtensions": [".transcript", ".evidence"]
|
|
434
|
+
}
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
Or through:
|
|
438
|
+
|
|
439
|
+
```bash
|
|
440
|
+
KB_INCLUDE_EXTENSIONS=".transcript,.evidence" pnpm exec kb ingest
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
Images, scans, audio/video files, old proprietary Office binaries such as `.doc`, and other formats
|
|
444
|
+
that are not listed should be OCRed, transcribed, converted, or exported to text/PDF/HTML first.
|
|
445
|
+
Mimir intentionally avoids pretending that every binary format can be indexed safely without
|
|
446
|
+
extraction logic.
|
|
193
447
|
|
|
194
448
|
## Config
|
|
195
449
|
|
|
@@ -200,13 +454,23 @@ state.
|
|
|
200
454
|
"rawDir": "private",
|
|
201
455
|
"storageDir": ".kb/storage",
|
|
202
456
|
"sourcesFile": ".kb/sources.txt",
|
|
457
|
+
"accessLogPath": ".kb/access.log",
|
|
458
|
+
"embeddingModelPath": ".mimir/models",
|
|
203
459
|
"tableName": "chunks",
|
|
204
|
-
"
|
|
205
|
-
"
|
|
206
|
-
"
|
|
460
|
+
"embeddingProvider": "local-hash",
|
|
461
|
+
"embeddingModel": "mixedbread-ai/mxbai-embed-xsmall-v1",
|
|
462
|
+
"transformersAllowRemoteModels": false,
|
|
463
|
+
"redaction": {
|
|
464
|
+
"enabled": true,
|
|
465
|
+
"builtIn": true,
|
|
466
|
+
"patterns": []
|
|
467
|
+
},
|
|
468
|
+
"accessLog": true,
|
|
469
|
+
"mcpMaxTopK": 10,
|
|
207
470
|
"topK": 5,
|
|
208
471
|
"chunkSize": 1200,
|
|
209
|
-
"chunkOverlap": 150
|
|
472
|
+
"chunkOverlap": 150,
|
|
473
|
+
"includeExtensions": []
|
|
210
474
|
}
|
|
211
475
|
```
|
|
212
476
|
|
|
@@ -215,12 +479,19 @@ Environment overrides:
|
|
|
215
479
|
- `KB_RAW_DIR`
|
|
216
480
|
- `KB_STORAGE_DIR`
|
|
217
481
|
- `KB_SOURCES_FILE`
|
|
218
|
-
- `
|
|
219
|
-
- `
|
|
220
|
-
- `
|
|
482
|
+
- `KB_ACCESS_LOG_PATH`
|
|
483
|
+
- `KB_EMBEDDING_PROVIDER`
|
|
484
|
+
- `KB_EMBEDDING_MODEL`
|
|
485
|
+
- `KB_EMBEDDING_MODEL_PATH`
|
|
486
|
+
- `KB_TRANSFORMERS_ALLOW_REMOTE_MODELS`
|
|
487
|
+
- `KB_REDACTION_ENABLED`
|
|
488
|
+
- `KB_REDACTION_BUILT_IN`
|
|
489
|
+
- `KB_ACCESS_LOG`
|
|
490
|
+
- `KB_MCP_MAX_TOP_K`
|
|
221
491
|
- `KB_TOP_K`
|
|
222
492
|
- `KB_CHUNK_SIZE`
|
|
223
493
|
- `KB_CHUNK_OVERLAP`
|
|
494
|
+
- `KB_INCLUDE_EXTENSIONS`
|
|
224
495
|
|
|
225
496
|
## Library API
|
|
226
497
|
|
|
@@ -234,7 +505,11 @@ const answer = await ask("What documents support the project timeline?")
|
|
|
234
505
|
|
|
235
506
|
## Privacy
|
|
236
507
|
|
|
237
|
-
-
|
|
508
|
+
- Mimir core does not generate answers or call a chat model.
|
|
509
|
+
- `local-hash` can run ingestion, search, and cited retrieval without a model runtime.
|
|
510
|
+
- Transformers.js remote model loading is disabled by default.
|
|
511
|
+
- Built-in redaction runs before indexing by default.
|
|
512
|
+
- Access logs store query hashes, not raw queries.
|
|
238
513
|
- The vector index is stored locally.
|
|
239
514
|
- Raw private documents should stay in the target repository's ignored `private/` folder.
|
|
240
515
|
- Do not put secrets or scans inside this package repository.
|