@jcode.labs/mimir 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +284 -40
  2. package/dist/chunking.d.ts.map +1 -1
  3. package/dist/chunking.js +6 -3
  4. package/dist/chunking.js.map +1 -1
  5. package/dist/cli.js +121 -9
  6. package/dist/cli.js.map +1 -1
  7. package/dist/config.d.ts.map +1 -1
  8. package/dist/config.js +50 -36
  9. package/dist/config.js.map +1 -1
  10. package/dist/defaults.d.ts +11 -0
  11. package/dist/defaults.d.ts.map +1 -0
  12. package/dist/defaults.js +31 -0
  13. package/dist/defaults.js.map +1 -0
  14. package/dist/embeddings.d.ts.map +1 -1
  15. package/dist/embeddings.js +85 -11
  16. package/dist/embeddings.js.map +1 -1
  17. package/dist/files.d.ts +2 -1
  18. package/dist/files.d.ts.map +1 -1
  19. package/dist/files.js +39 -2
  20. package/dist/files.js.map +1 -1
  21. package/dist/gitignore.d.ts +1 -1
  22. package/dist/gitignore.d.ts.map +1 -1
  23. package/dist/gitignore.js +8 -7
  24. package/dist/gitignore.js.map +1 -1
  25. package/dist/ingest.d.ts.map +1 -1
  26. package/dist/ingest.js +2 -1
  27. package/dist/ingest.js.map +1 -1
  28. package/dist/init.d.ts.map +1 -1
  29. package/dist/init.js +4 -24
  30. package/dist/init.js.map +1 -1
  31. package/dist/mcp.d.ts.map +1 -1
  32. package/dist/mcp.js +14 -13
  33. package/dist/mcp.js.map +1 -1
  34. package/dist/parsing.d.ts.map +1 -1
  35. package/dist/parsing.js +138 -0
  36. package/dist/parsing.js.map +1 -1
  37. package/dist/query.d.ts.map +1 -1
  38. package/dist/query.js +14 -22
  39. package/dist/query.js.map +1 -1
  40. package/dist/security.js +16 -18
  41. package/dist/security.js.map +1 -1
  42. package/dist/skill.d.ts +2 -1
  43. package/dist/skill.d.ts.map +1 -1
  44. package/dist/skill.js +24 -9
  45. package/dist/skill.js.map +1 -1
  46. package/dist/store.d.ts.map +1 -1
  47. package/dist/store.js +2 -1
  48. package/dist/store.js.map +1 -1
  49. package/dist/types.d.ts +12 -14
  50. package/dist/types.d.ts.map +1 -1
  51. package/dist/version.d.ts +1 -1
  52. package/dist/version.js +1 -1
  53. package/examples/sovereign-rag-demo/.kb/config.json +22 -0
  54. package/examples/sovereign-rag-demo/.kb/sources.txt +2 -0
  55. package/examples/sovereign-rag-demo/README.md +80 -0
  56. package/examples/sovereign-rag-demo/raw/dataset-inventory.csv +5 -0
  57. package/examples/sovereign-rag-demo/raw/incident-timeline.jsonl +4 -0
  58. package/examples/sovereign-rag-demo/raw/operations-brief.md +16 -0
  59. package/examples/sovereign-rag-demo/raw/review-notes.evidence +11 -0
  60. package/examples/sovereign-rag-demo/raw/security-policy.yaml +14 -0
  61. package/package.json +23 -29
  62. package/skills/mimir/SKILL.md +66 -5
  63. package/skills/mimir-audio-summary/SKILL.md +140 -0
  64. package/skills/mimir-audio-summary/forge-voice.sh +150 -0
  65. package/skills/mimir-audio-summary/split-lines.py +13 -0
  66. package/skills/mimir-audio-summary/xtts-voice.py +46 -0
  67. package/CHANGELOG.md +0 -28
  68. package/SECURITY-HARDENING.md +0 -156
  69. package/SECURITY.md +0 -21
  70. package/dist/network.d.ts +0 -4
  71. package/dist/network.d.ts.map +0 -1
  72. package/dist/network.js +0 -59
  73. package/dist/network.js.map +0 -1
@@ -5,7 +5,9 @@ description: Use this skill whenever a repository uses or should use Mimir, loca
5
5
 
6
6
  # Mimir
7
7
 
8
- Mimir is a local-first knowledge base for project documents. It indexes files from the current repository, stores vectors locally, and exposes both a CLI and an MCP server.
8
+ Mimir is a sovereign local RAG knowledge base for confidential project documents and datasets. It
9
+ indexes files from the current repository, stores vectors locally, and exposes both a CLI and an MCP
10
+ server.
9
11
 
10
12
  Use this skill to help an AI agent work with a Mimir-enabled repository without leaking private documents or relying on stale memory.
11
13
 
@@ -55,6 +57,34 @@ npm install --save-dev @jcode.labs/mimir
55
57
  npx kb init
56
58
  ```
57
59
 
60
+ ## Provider Modes
61
+
62
+ Default retrieval mode:
63
+
64
+ ```json
65
+ {
66
+ "embeddingProvider": "local-hash"
67
+ }
68
+ ```
69
+
70
+ This supports ingestion, search, MCP retrieval, and `kb ask` with cited passages without a model
71
+ server. It is lexical/hash retrieval, not model-semantic search. Do not present it as equivalent to
72
+ semantic embeddings.
73
+
74
+ Optional semantic embedding mode:
75
+
76
+ ```json
77
+ {
78
+ "embeddingProvider": "transformers",
79
+ "embeddingModel": "mixedbread-ai/mxbai-embed-xsmall-v1",
80
+ "embeddingModelPath": ".mimir/models",
81
+ "transformersAllowRemoteModels": false
82
+ }
83
+ ```
84
+
85
+ This uses Transformers.js for embeddings only. Keep `transformersAllowRemoteModels` false for
86
+ air-gapped or confidential work and preload model files under `embeddingModelPath`.
87
+
58
88
  ## Ingestion Workflow
59
89
 
60
90
  After documents are added or changed:
@@ -77,13 +107,32 @@ Use search when you need exact source passages:
77
107
  pnpm exec kb search "your query"
78
108
  ```
79
109
 
80
- Use ask when you need a synthesized answer with citations:
110
+ Use ask when you need cited context for the current agent or an external LLM:
81
111
 
82
112
  ```bash
83
113
  pnpm exec kb ask "your question"
84
114
  ```
85
115
 
86
- Ground answers in returned sources. If search results are weak, say that the current index does not prove the point and ask for the missing document.
116
+ Ground answers in returned sources. If search results are weak, say that the current index does not
117
+ prove the point and ask for the missing document. `kb ask` returns cited passages rather than LLM
118
+ synthesis. Use those passages as context for the current agent, or tell the user that generative
119
+ synthesis needs a trusted external LLM or model runtime.
120
+
121
+ ## Deep Research Workflow
122
+
123
+ For broad summaries, audits, planning, or institutional dossiers, do not rely on one query. Build a
124
+ small retrieval plan first:
125
+
126
+ - check `kb audit` and `kb security-audit`;
127
+ - query the main topic;
128
+ - query names, dates, amounts, obligations, risks, decisions, and missing evidence separately;
129
+ - compare the strongest passages across files;
130
+ - ask a synthesis question only after search has found enough grounded context;
131
+ - cite source paths and chunk numbers in the answer when useful;
132
+ - explicitly say when the index does not prove a claim.
133
+
134
+ For sensitive work, prefer the smallest useful `topK`; raise it only when the first results are too
135
+ thin. Do not dump large raw passages into the chat unless the user explicitly asks for extracts.
87
136
 
88
137
  ## MCP Usage
89
138
 
@@ -105,15 +154,27 @@ Available MCP tools:
105
154
 
106
155
  - `mimir_status`: show config and chunk count.
107
156
  - `mimir_search`: retrieve source passages.
108
- - `mimir_ask`: synthesize an answer with local citations.
157
+ - `mimir_ask`: return cited retrieval context.
109
158
  - `mimir_audit`: compare source files with the current index.
110
- - `mimir_security_audit`: inspect local privacy, network, redaction, MCP, and gitignore posture.
159
+ - `mimir_security_audit`: inspect local privacy, provider, redaction, MCP, and gitignore posture.
111
160
 
112
161
  Prefer MCP tools over shell commands when the agent runtime provides them. Use shell commands when MCP is unavailable.
113
162
 
114
163
  MCP is read-focused and intentionally does not expose index deletion. Use `pnpm exec kb
115
164
  destroy-index --yes` from the shell when the user explicitly wants to remove the generated index.
116
165
 
166
+ ## Optional Audio Summaries
167
+
168
+ If the user asks for a listenable or TTS summary, load the optional
169
+ `.mimir/skills/mimir-audio-summary/` skill installed by `pnpm exec kb install-skill`.
170
+
171
+ That skill should:
172
+
173
+ - gather evidence through Mimir first;
174
+ - write narration text only to a temp file outside the repository;
175
+ - render generated audio under `.mimir/audio/` by default;
176
+ - prefer offline TTS engines for confidential content.
177
+
117
178
  ## Installing This Skill Into A Repository
118
179
 
119
180
  Run:
@@ -0,0 +1,140 @@
1
+ ---
2
+ name: mimir-audio-summary
3
+ description: >-
4
+ Create an optional spoken audio summary from a Mimir local knowledge base. Use when the user asks
5
+ for an audio, TTS, spoken brief, briefing, narration, or listenable summary based on private
6
+ repository documents indexed by Mimir. The skill is confidentiality-first: gather facts through
7
+ Mimir, write only a temporary narration text file outside the repository, and render the final
8
+ audio under ignored local Mimir state unless the user explicitly chooses another output path.
9
+ ---
10
+
11
+ # Mimir Audio Summary
12
+
13
+ Use this skill to turn a confidential local Mimir knowledge base into an optional audio summary.
14
+ The knowledge base stays local; the final audio is a generated artifact and must not be committed.
15
+
16
+ ## Confidentiality Rules
17
+
18
+ - Treat the source documents, retrieved passages, generated narration, and final audio as sensitive.
19
+ - Do not use online TTS for confidential content unless the user explicitly allows it.
20
+ - Prefer `pnpm exec kb audio` or `pnpm exec mimir-tts render` for plug-and-play output.
21
+ - Use `--engine edge` only when online TTS is acceptable and global Voice Forge quality is required.
22
+ - Use `--engine transformers --offline` when model files are already present and remote model
23
+ loading is not allowed.
24
+ - Write the narration text to a temp file outside the repository, such as `/tmp/MIMIR-SUMMARY-topic.txt`.
25
+ - Render audio under `.mimir/audio/` by default. This directory is ignored by Git when Mimir is installed.
26
+ - Never stage or commit generated audio, temporary text, WAV, AIFF, or intermediate files.
27
+
28
+ ## 1. Verify The Knowledge Base
29
+
30
+ From the repository root, run:
31
+
32
+ ```bash
33
+ pnpm exec kb status
34
+ pnpm exec kb audit
35
+ pnpm exec kb security-audit
36
+ ```
37
+
38
+ If the audit reports missing or stale files, run:
39
+
40
+ ```bash
41
+ pnpm exec kb ingest
42
+ pnpm exec kb audit
43
+ ```
44
+
45
+ Do not create an audio summary from stale or incomplete evidence unless the user explicitly accepts
46
+ that limitation.
47
+
48
+ ## 2. Search Deeply Before Writing
49
+
50
+ Use Mimir search or MCP tools to gather evidence before drafting the narration.
51
+
52
+ For a broad summary, run multiple searches:
53
+
54
+ ```bash
55
+ pnpm exec kb search "<main topic>" --top-k 8
56
+ pnpm exec kb search "<people, dates, money, obligations, risks, or decisions>" --top-k 8
57
+ pnpm exec kb ask "<specific synthesis question>" --top-k 8
58
+ ```
59
+
60
+ When MCP is available, prefer `mimir_search`, `mimir_ask`, `mimir_audit`, and
61
+ `mimir_security_audit` over shell commands.
62
+
63
+ Keep citations in your working notes, but do not read long raw passages aloud. The audio should be a
64
+ clear synthesis, not a dump of source text.
65
+
66
+ ## 3. Write For Listening
67
+
68
+ Write one flowing narration in the user's working language. Do not use markdown, headings, bullets,
69
+ tables, SSML, XML tags, or stage directions in the spoken text.
70
+
71
+ Good audio structure:
72
+
73
+ 1. Start with the purpose of the summary and the two-to-four ideas to retain.
74
+ 2. Explain the current evidence in plain language.
75
+ 3. Separate proven facts from uncertainty.
76
+ 4. Highlight decisions, risks, deadlines, and missing documents.
77
+ 5. End with a concise recap and two or three self-check questions.
78
+
79
+ Use short sentences and natural punctuation. Spell acronyms and symbols in a way a TTS engine can
80
+ pronounce.
81
+
82
+ ## 4. Render The Audio
83
+
84
+ Create the output directory and write the narration to a temp file outside the repo:
85
+
86
+ ```bash
87
+ mkdir -p .mimir/audio
88
+ ```
89
+
90
+ For global Voice Forge quality on non-confidential text, render with Edge MP3:
91
+
92
+ ```bash
93
+ pnpm exec kb audio /tmp/MIMIR-SUMMARY-<subject-kebab>.txt \
94
+ --engine edge \
95
+ --out .mimir/audio/MIMIR-SUMMARY-<subject-kebab>.mp3
96
+ ```
97
+
98
+ The Edge path uses the online Edge TTS service through the `edge-tts` CLI. Use it only when sending
99
+ the narration text to that service is acceptable.
100
+
101
+ For confidential or air-gapped operation, preload the model files under `.mimir/models/tts` and run:
102
+
103
+ ```bash
104
+ pnpm exec kb audio /tmp/MIMIR-SUMMARY-<subject-kebab>.txt \
105
+ --engine transformers \
106
+ --offline \
107
+ --model-path .mimir/models/tts \
108
+ --out .mimir/audio/MIMIR-SUMMARY-<subject-kebab>.wav
109
+ ```
110
+
111
+ The Transformers.js path does not require Python, ffmpeg, Piper, XTTS, or a local TTS server. The
112
+ first non-offline Transformers render can download public model files into `.mimir/models/tts`, but
113
+ the narration text is processed locally.
114
+
115
+ Use the voice-forge helper only when the user explicitly wants XTTS, macOS `say`, or Piper:
116
+
117
+ ```bash
118
+ OUT_MP3="<repo-root>/.mimir/audio/MIMIR-SUMMARY-<subject-kebab>.mp3" \
119
+ TTS_ENGINE=auto \
120
+ bash <this-skill-dir>/forge-voice.sh /tmp/MIMIR-SUMMARY-<subject-kebab>.txt
121
+ ```
122
+
123
+ Helper engine selection:
124
+
125
+ - `auto`: Edge first when installed, then XTTS, macOS `say`, and Piper.
126
+ - `edge`: online Edge TTS with the global Voice Forge default voice.
127
+ - `xtts`: local Coqui XTTS-v2.
128
+ - `say`: local macOS speech engine, converted to MP3 with `ffmpeg`.
129
+ - `piper`: local neural TTS, converted to MP3 with `ffmpeg`.
130
+
131
+ Voice can be selected with `TTS_VOICE`. Speed for Edge can be selected with `TTS_RATE`.
132
+
133
+ ## 5. Report The Result
134
+
135
+ After rendering, report:
136
+
137
+ - the audio path;
138
+ - which renderer/model was used or requested;
139
+ - whether remote model downloads or online TTS were allowed;
140
+ - any evidence limitation, such as stale index, missing documents, or weak search results.
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env bash
2
+ # Render a Mimir audio-summary text file to MP3. The text is a throwaway intermediate written
3
+ # outside the repository by the skill. The final audio should normally be written under .mimir/audio.
4
+ #
5
+ # Engine via TTS_ENGINE (auto|edge|xtts|say|piper); default "auto" matches the global Voice Forge
6
+ # quality path:
7
+ # edge - edge-tts neural voices, single request (online). Default; clean on normal text.
8
+ # Set TTS_SEGMENT=1 to render sentence-by-sentence for long-text truncation.
9
+ # xtts - local Coqui XTTS-v2 when installed.
10
+ # say - macOS built-in, offline, clean but robotic.
11
+ # piper - local neural TTS.
12
+ #
13
+ # Usage: forge-voice.sh <text-file> [voice]
14
+ # Env: OUT_MP3 explicit mp3 output path
15
+ # TTS_ENGINE force an engine (default: auto)
16
+ # TTS_VOICE voice/speaker (engine-specific); the [voice] arg overrides it
17
+ # TTS_RATE edge-tts speed delta, default +0%
18
+ # TTS_SEGMENT=1 edge: render sentence-by-sentence
19
+ # XTTS_SPEAKER xtts preset speaker (default Ana Florence)
20
+ # PIPER_MODEL piper onnx model path
21
+ # KEEP_TEXT=1 keep the source text file after a successful render
22
+ set -euo pipefail
23
+
24
+ TXT="${1:?usage: forge-voice.sh <text-file> [voice]}"
25
+ [ -f "$TXT" ] || { echo "error: file not found: $TXT" >&2; exit 1; }
26
+ VOICE="${2:-${TTS_VOICE:-}}"
27
+ ENGINE="${TTS_ENGINE:-auto}"
28
+ OUT_FINAL="${OUT_MP3:-${TXT%.txt}.mp3}"
29
+ OUTBASE="${OUT_FINAL%.mp3}"
30
+
31
+ cleanup() {
32
+ [ "${KEEP_TEXT:-0}" = "1" ] || rm -f "$TXT"
33
+ rm -f "${OUTBASE}.wav" "${OUTBASE}.aiff"
34
+ }
35
+ trap cleanup EXIT
36
+
37
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
38
+ XTTS_PY="${XTTS_PY:-$HOME/.local/share/voice-forge/xtts/bin/python}"
39
+ XTTS_HELPER="$SCRIPT_DIR/xtts-voice.py"
40
+
41
+ xtts_ready() { [ -x "$XTTS_PY" ] && [ -f "$XTTS_HELPER" ]; }
42
+
43
+ finish() {
44
+ echo "$1"
45
+ exit 0
46
+ }
47
+
48
+ to_mp3() {
49
+ local src="$1" out="${OUTBASE}.mp3"
50
+ if ! command -v ffmpeg >/dev/null 2>&1; then
51
+ rm -f "$src"
52
+ echo "error: ffmpeg is required to convert local TTS output to mp3" >&2
53
+ exit 1
54
+ fi
55
+ if ffmpeg -y -loglevel error -i "$src" -ac 1 -c:a libmp3lame -q:a 4 "$out"; then
56
+ rm -f "$src"
57
+ finish "$out"
58
+ fi
59
+ rm -f "$src" "$out"
60
+ echo "error: failed to convert local TTS output to mp3" >&2
61
+ exit 1
62
+ }
63
+
64
+ if [ "$ENGINE" = "edge" ] || [ "$ENGINE" = "auto" ]; then
65
+ if command -v edge-tts >/dev/null 2>&1; then
66
+ OUT="${OUTBASE}.mp3"
67
+ SPLIT="$SCRIPT_DIR/split-lines.py"
68
+ voice="${VOICE:-fr-FR-DeniseNeural}"
69
+ rate="${TTS_RATE:-+0%}"
70
+ if [ "${TTS_SEGMENT:-0}" = "1" ] && [ -f "$SPLIT" ] \
71
+ && command -v ffmpeg >/dev/null 2>&1 && command -v python3 >/dev/null 2>&1; then
72
+ TMP="$(mktemp -d)"
73
+ ffmpeg -y -loglevel error -f lavfi -i anullsrc=r=24000:cl=mono -t 0.28 \
74
+ -c:a libmp3lame -q:a 4 "$TMP/sil.mp3"
75
+ i=0
76
+ : > "$TMP/list.txt"
77
+ while IFS= read -r line; do
78
+ [ -z "$line" ] && continue
79
+ i=$((i + 1))
80
+ if edge-tts --text "$line" --voice "$voice" --rate="$rate" \
81
+ --write-media "$TMP/seg_$i.mp3" >/dev/null 2>&1; then
82
+ printf "file '%s'\n" "$TMP/seg_$i.mp3" >> "$TMP/list.txt"
83
+ printf "file '%s'\n" "$TMP/sil.mp3" >> "$TMP/list.txt"
84
+ else
85
+ echo "warn: edge-tts failed on a sentence, skipping it" >&2
86
+ fi
87
+ done < <(python3 "$SPLIT" "$TXT")
88
+ ffmpeg -y -loglevel error -f concat -safe 0 -i "$TMP/list.txt" \
89
+ -ac 1 -c:a libmp3lame -q:a 4 "$OUT"
90
+ rm -rf "$TMP"
91
+ finish "$OUT"
92
+ fi
93
+ edge-tts --file "$TXT" --voice "$voice" --rate="$rate" --write-media "$OUT" >/dev/null
94
+ finish "$OUT"
95
+ fi
96
+ if [ "$ENGINE" = "edge" ]; then
97
+ echo "error: TTS_ENGINE=edge but edge-tts not installed (pipx install edge-tts)" >&2
98
+ exit 1
99
+ fi
100
+ fi
101
+
102
+ if [ "$ENGINE" = "xtts" ] || { [ "$ENGINE" = "auto" ] && xtts_ready; }; then
103
+ if ! xtts_ready; then
104
+ echo "error: TTS_ENGINE=xtts but venv/helper missing ($XTTS_PY)" >&2
105
+ exit 1
106
+ fi
107
+ WAV="${OUTBASE}.wav"
108
+ if [ -n "$VOICE" ]; then
109
+ COQUI_TOS_AGREED=1 "$XTTS_PY" "$XTTS_HELPER" "$TXT" "$WAV" "$VOICE" >&2
110
+ else
111
+ COQUI_TOS_AGREED=1 "$XTTS_PY" "$XTTS_HELPER" "$TXT" "$WAV" >&2
112
+ fi
113
+ to_mp3 "$WAV"
114
+ fi
115
+
116
+ if [ "$ENGINE" = "say" ] || [ "$ENGINE" = "auto" ]; then
117
+ if command -v say >/dev/null 2>&1; then
118
+ AIFF="${OUTBASE}.aiff"
119
+ say -v "${VOICE:-Jacques}" -f "$TXT" -o "$AIFF" 2>/dev/null || say -f "$TXT" -o "$AIFF"
120
+ to_mp3 "$AIFF"
121
+ fi
122
+ if [ "$ENGINE" = "say" ]; then
123
+ echo "error: TTS_ENGINE=say but 'say' not available" >&2
124
+ exit 1
125
+ fi
126
+ fi
127
+
128
+ if [ "$ENGINE" = "piper" ] || [ "$ENGINE" = "auto" ]; then
129
+ if command -v piper >/dev/null 2>&1; then
130
+ WAV="${OUTBASE}.wav"
131
+ piper -m "${PIPER_MODEL:-fr_FR-siwis-medium.onnx}" -f "$WAV" < "$TXT"
132
+ to_mp3 "$WAV"
133
+ fi
134
+ if [ "$ENGINE" = "piper" ]; then
135
+ echo "error: TTS_ENGINE=piper but piper not installed (pip install piper-tts)" >&2
136
+ exit 1
137
+ fi
138
+ fi
139
+
140
+ cat >&2 <<'EOF'
141
+ error: no TTS engine available. Install one:
142
+ edge-tts (cleanest, online): pipx install edge-tts
143
+ XTTS-v2 (local):
144
+ uv venv --python 3.11 ~/.local/share/voice-forge/xtts
145
+ uv pip install --python ~/.local/share/voice-forge/xtts/bin/python \
146
+ coqui-tts 'transformers>=4.57,<5' 'torch==2.8.*' 'torchaudio==2.8.*'
147
+ piper (local): pip install piper-tts
148
+ ffmpeg: required for local engine MP3 conversion
149
+ EOF
150
+ exit 127
@@ -0,0 +1,13 @@
1
+ """Print one sentence per line from a text file.
2
+
3
+ Used to render long narrations sentence-by-sentence when an engine benefits from segmentation.
4
+ """
5
+ import re
6
+ import sys
7
+
8
+ raw = open(sys.argv[1], encoding="utf-8").read()
9
+ text = re.sub(r"\s+", " ", raw).strip()
10
+ for part in re.split(r"(?<=[.!?…])\s+", text):
11
+ part = part.strip()
12
+ if part:
13
+ print(part)
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env python3
2
+ """Render a text file to WAV with Coqui XTTS-v2.
3
+
4
+ Run with the dedicated venv interpreter:
5
+ ~/.local/share/voice-forge/xtts/bin/python xtts-voice.py <text.txt> <out.wav> [speaker]
6
+
7
+ Env: XTTS_SPEAKER (preset name), XTTS_LANG (default "fr").
8
+ """
9
+ import os
10
+ import sys
11
+
12
+ os.environ.setdefault("COQUI_TOS_AGREED", "1")
13
+
14
+
15
+ def main() -> None:
16
+ if len(sys.argv) < 3:
17
+ sys.exit("usage: xtts-voice.py <text.txt> <out.wav> [speaker]")
18
+ text_file, out_wav = sys.argv[1], sys.argv[2]
19
+ speaker = sys.argv[3] if len(sys.argv) > 3 else os.environ.get("XTTS_SPEAKER", "Ana Florence")
20
+ language = os.environ.get("XTTS_LANG", "fr")
21
+
22
+ with open(text_file, encoding="utf-8") as handle:
23
+ text = handle.read().strip()
24
+ if not text:
25
+ sys.exit("error: empty text file")
26
+
27
+ from TTS.api import TTS
28
+
29
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
30
+ speakers = list(getattr(tts, "speakers", None) or [])
31
+ if speakers and speaker not in speakers:
32
+ sys.stderr.write(f"speaker '{speaker}' not found; falling back to '{speakers[0]}'\n")
33
+ speaker = speakers[0]
34
+
35
+ tts.tts_to_file(
36
+ text=text,
37
+ speaker=speaker,
38
+ language=language,
39
+ file_path=out_wav,
40
+ split_sentences=True,
41
+ )
42
+ print(out_wav)
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()
package/CHANGELOG.md DELETED
@@ -1,28 +0,0 @@
1
- # Changelog
2
-
3
- ## 0.3.0 - 2026-06-28
4
-
5
- - Add confidentiality hardening defaults: local-only Ollama network policy, built-in
6
- redaction before indexing, metadata-only access logs, and bounded MCP retrieval.
7
- - Add `kb security-audit` for zero-telemetry, network, redaction, gitignore, storage, and
8
- MCP posture checks.
9
- - Add `kb destroy-index --yes` to remove generated vector indexes.
10
- - Add release verification artifacts: npm tarball, SHA256 checksums, SBOM, and manifest.
11
- - Document air-gapped operation, threat model, MCP hardening, and secure deletion limits.
12
-
13
- ## 0.2.1 - 2026-06-28
14
-
15
- - Add GitHub Sponsors funding metadata and document suggested sponsor tiers.
16
- - Add maintainer positioning for Jean-Baptiste Thery and JCode Labs in the README.
17
- - Make `kb init` and `kb install-skill` automatically keep `.kb/` and `.mimir/`
18
- ignored by Git.
19
-
20
- ## 0.2.0 - 2026-06-28
21
-
22
- - Rename public product branding to Mimir while keeping the JCode Labs npm scope.
23
- - Add the bundled portable `mimir` agent skill.
24
- - Add the MCP stdio server with `mimir_status`, `mimir_search`, `mimir_ask`, and
25
- `mimir_audit`.
26
- - Add production smoke coverage for the built CLI and MCP server.
27
- - Add Biome, commitlint, publint, CodeQL, Dependabot grouping, protected npm publishing,
28
- and open-source contribution/security documentation.
@@ -1,156 +0,0 @@
1
- # Mimir Security Hardening
2
-
3
- Mimir is a local-first knowledge base for private project documents. It is built to minimize
4
- data movement, but it is not a certified high-assurance system.
5
-
6
- ## Current Guarantees
7
-
8
- - Zero telemetry: Mimir does not send usage analytics or document content to JCode Labs.
9
- - Local-only network policy by default: document text can only be sent to loopback Ollama hosts
10
- unless the repository explicitly opts in to broader network access.
11
- - Redaction before indexing: built-in DLP patterns redact common secrets and identifiers before
12
- chunks are embedded and stored.
13
- - Metadata-only access logs: access logs contain action metadata and query hashes, not raw
14
- queries or retrieved text.
15
- - Generated local state is ignored by Git: `.kb/`, `.mimir/`, and `private/**` are ignored by
16
- default.
17
- - MCP is read-focused: destructive tools are not exposed over MCP, and MCP retrieval is capped by
18
- `mcpMaxTopK`.
19
- - npm releases are published with provenance from the protected GitHub Actions workflow.
20
- - Release artifacts include a package tarball, SHA256 checksums, SBOM, and manifest.
21
-
22
- ## Threat Model
23
-
24
- Mimir protects against accidental repository leaks, accidental remote LLM usage, accidental secret
25
- indexing, and weak release traceability.
26
-
27
- Mimir does not protect against a compromised local machine, malicious dependencies already present
28
- in the runtime, a user with filesystem access to the same checkout, or forensic recovery from an
29
- unencrypted disk.
30
-
31
- ## At-Rest Encryption
32
-
33
- Native encrypted LanceDB storage is not implemented yet. For sensitive environments, put the
34
- repository and `.kb/` on an encrypted volume:
35
-
36
- - macOS: FileVault or an encrypted APFS volume.
37
- - Linux: LUKS, fscrypt, or an encrypted VM disk.
38
- - Containers/VMs: mount `.kb/` on an encrypted host volume.
39
-
40
- `kb destroy-index --yes` removes generated index files, but secure deletion on SSDs and copy-on-write
41
- filesystems cannot be guaranteed without encrypted storage and key destruction.
42
-
43
- ## Air-Gapped Operation
44
-
45
- Prepare artifacts on an internet-connected build machine:
46
-
47
- ```bash
48
- pnpm install --frozen-lockfile
49
- pnpm build
50
- pnpm release:artifacts
51
- ```
52
-
53
- Move the generated tarball from `release-artifacts/` into the offline environment and install it:
54
-
55
- ```bash
56
- pnpm add -D ./jcode.labs-mimir-<version>.tgz
57
- pnpm exec kb init
58
- pnpm exec kb ingest
59
- ```
60
-
61
- Ollama and the required models must also be preloaded inside the offline environment.
62
-
63
- ## Zero Network Posture
64
-
65
- Default config:
66
-
67
- ```json
68
- {
69
- "ollamaHost": "http://localhost:11434",
70
- "networkPolicy": "local-only"
71
- }
72
- ```
73
-
74
- Allowed policies:
75
-
76
- - `local-only`: only loopback hosts such as `localhost` and `127.0.0.1`.
77
- - `allow-private`: loopback and private LAN hosts.
78
- - `allow-any`: any host. Use only when the remote endpoint is explicitly trusted.
79
-
80
- Run:
81
-
82
- ```bash
83
- pnpm exec kb security-audit --strict
84
- ```
85
-
86
- ## DLP Redaction
87
-
88
- Built-in redaction is enabled by default for common secret and identifier shapes: private keys,
89
- JWTs, API tokens, emails, IBANs, and card-like numbers.
90
-
91
- Custom patterns can be added in `.kb/config.json`:
92
-
93
- ```json
94
- {
95
- "redaction": {
96
- "enabled": true,
97
- "builtIn": true,
98
- "patterns": [
99
- {
100
- "name": "internal_case_id",
101
- "pattern": "CASE-[0-9]+",
102
- "replacement": "[CASE]"
103
- }
104
- ]
105
- }
106
- }
107
- ```
108
-
109
- Redaction changes the indexed text, not the raw files under `private/`.
110
-
111
- ## MCP Hardening
112
-
113
- MCP gives an agent access to retrieved private context. Use it only for agents running under the
114
- same trust boundary as the repository.
115
-
116
- Mimir MCP defaults:
117
-
118
- - read-focused tools only;
119
- - no index deletion tool exposed over MCP;
120
- - bounded retrieval through `mcpMaxTopK`;
121
- - metadata-only access logging.
122
-
123
- For team use, prefer one checkout per user or per role. Mimir does not implement RBAC.
124
-
125
- ## Release Verification
126
-
127
- The protected npm workflow runs validation, generates release artifacts, and publishes with
128
- provenance:
129
-
130
- ```bash
131
- npm publish --access public --provenance
132
- ```
133
-
134
- Release artifacts include:
135
-
136
- - npm tarball;
137
- - `SHA256SUMS`;
138
- - CycloneDX SBOM;
139
- - `release-manifest.json`.
140
-
141
- Verify checksums offline with:
142
-
143
- ```bash
144
- sha256sum -c SHA256SUMS
145
- ```
146
-
147
- On macOS:
148
-
149
- ```bash
150
- shasum -a 256 -c SHA256SUMS
151
- ```
152
-
153
- ## External Audit Status
154
-
155
- No external security audit has been completed yet. Treat Mimir as useful hardening for private
156
- developer workflows, not as military-grade certified software.
package/SECURITY.md DELETED
@@ -1,21 +0,0 @@
1
- # Security Policy
2
-
3
- ## Supported Versions
4
-
5
- Only the latest published version of `@jcode.labs/mimir` receives security fixes.
6
-
7
- ## Reporting A Vulnerability
8
-
9
- Please report vulnerabilities privately by email:
10
-
11
- ```plain text
12
- contact@jcode.works
13
- ```
14
-
15
- Do not open public issues for vulnerabilities, leaked secrets, credential exposure,
16
- or private document disclosure.
17
-
18
- ## Data Boundary
19
-
20
- Mimir is designed to index local project documents. Raw project documents,
21
- `.kb/storage/`, environment files, and credentials must remain outside commits.
package/dist/network.d.ts DELETED
@@ -1,4 +0,0 @@
1
- import type { Config, HostClassification } from "./types.js";
2
- export declare function assertNetworkPolicy(config: Config): void;
3
- export declare function classifyHost(input: string): HostClassification;
4
- //# sourceMappingURL=network.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"network.d.ts","sourceRoot":"","sources":["../src/network.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAA;AAE5D,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAsBxD;AAED,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,kBAAkB,CAkB9D"}