@jcode.labs/mimir 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/CONTRIBUTING.md +28 -0
- package/README.md +307 -32
- package/SECURITY-HARDENING.md +194 -0
- package/SECURITY.md +21 -0
- package/dist/access-log.d.ts +10 -0
- package/dist/access-log.d.ts.map +1 -0
- package/dist/access-log.js +29 -0
- package/dist/access-log.js.map +1 -0
- package/dist/chunking.d.ts.map +1 -1
- package/dist/chunking.js +6 -3
- package/dist/chunking.js.map +1 -1
- package/dist/cli.js +151 -5
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +83 -20
- package/dist/config.js.map +1 -1
- package/dist/defaults.d.ts +11 -0
- package/dist/defaults.d.ts.map +1 -0
- package/dist/defaults.js +31 -0
- package/dist/defaults.js.map +1 -0
- package/dist/destroy.d.ts +3 -0
- package/dist/destroy.d.ts.map +1 -0
- package/dist/destroy.js +16 -0
- package/dist/destroy.js.map +1 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/embeddings.js +85 -9
- package/dist/embeddings.js.map +1 -1
- package/dist/files.d.ts +2 -1
- package/dist/files.d.ts.map +1 -1
- package/dist/files.js +40 -3
- package/dist/files.js.map +1 -1
- package/dist/gitignore.d.ts +1 -1
- package/dist/gitignore.d.ts.map +1 -1
- package/dist/gitignore.js +8 -7
- package/dist/gitignore.js.map +1 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/ingest.d.ts.map +1 -1
- package/dist/ingest.js +14 -2
- package/dist/ingest.js.map +1 -1
- package/dist/init.d.ts.map +1 -1
- package/dist/init.js +4 -15
- package/dist/init.js.map +1 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +27 -15
- package/dist/mcp.js.map +1 -1
- package/dist/parsing.d.ts.map +1 -1
- package/dist/parsing.js +138 -0
- package/dist/parsing.js.map +1 -1
- package/dist/query.d.ts.map +1 -1
- package/dist/query.js +28 -20
- package/dist/query.js.map +1 -1
- package/dist/redaction.d.ts +7 -0
- package/dist/redaction.d.ts.map +1 -0
- package/dist/redaction.js +63 -0
- package/dist/redaction.js.map +1 -0
- package/dist/security.d.ts +3 -0
- package/dist/security.d.ts.map +1 -0
- package/dist/security.js +84 -0
- package/dist/security.js.map +1 -0
- package/dist/skill.d.ts +2 -1
- package/dist/skill.d.ts.map +1 -1
- package/dist/skill.js +24 -9
- package/dist/skill.js.map +1 -1
- package/dist/store.d.ts.map +1 -1
- package/dist/store.js +2 -1
- package/dist/store.js.map +1 -1
- package/dist/types.d.ts +68 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/examples/sovereign-rag-demo/.kb/config.json +22 -0
- package/examples/sovereign-rag-demo/.kb/sources.txt +2 -0
- package/examples/sovereign-rag-demo/README.md +80 -0
- package/examples/sovereign-rag-demo/raw/dataset-inventory.csv +5 -0
- package/examples/sovereign-rag-demo/raw/incident-timeline.jsonl +4 -0
- package/examples/sovereign-rag-demo/raw/operations-brief.md +16 -0
- package/examples/sovereign-rag-demo/raw/review-notes.evidence +11 -0
- package/examples/sovereign-rag-demo/raw/security-policy.yaml +14 -0
- package/package.json +28 -25
- package/skills/mimir/SKILL.md +77 -6
- package/skills/mimir-audio-summary/SKILL.md +134 -0
- package/skills/mimir-audio-summary/forge-voice.sh +153 -0
- package/skills/mimir-audio-summary/split-lines.py +13 -0
- package/skills/mimir-audio-summary/xtts-voice.py +46 -0
package/skills/mimir/SKILL.md
CHANGED
|
@@ -5,7 +5,9 @@ description: Use this skill whenever a repository uses or should use Mimir, loca
|
|
|
5
5
|
|
|
6
6
|
# Mimir
|
|
7
7
|
|
|
8
|
-
Mimir is a local
|
|
8
|
+
Mimir is a sovereign local RAG knowledge base for confidential project documents and datasets. It
|
|
9
|
+
indexes files from the current repository, stores vectors locally, and exposes both a CLI and an MCP
|
|
10
|
+
server.
|
|
9
11
|
|
|
10
12
|
Use this skill to help an AI agent work with a Mimir-enabled repository without leaking private documents or relying on stale memory.
|
|
11
13
|
|
|
@@ -20,12 +22,15 @@ private/ # raw documents to ingest
|
|
|
20
22
|
.kb/config.json # local Mimir config
|
|
21
23
|
.kb/sources.txt # optional extra source paths
|
|
22
24
|
.kb/storage/ # generated local index
|
|
25
|
+
.kb/access.log # metadata-only access log
|
|
23
26
|
```
|
|
24
27
|
|
|
25
28
|
## Data Safety
|
|
26
29
|
|
|
27
30
|
- Do not commit raw documents, secrets, tax IDs, scans, bank documents, tokens, or generated vector stores.
|
|
28
|
-
- Keep `private
|
|
31
|
+
- Keep `private/**`, `.kb/`, and `.mimir/` ignored by Git.
|
|
32
|
+
- Treat `kb search`, `kb ask`, and MCP results as sensitive because they can contain private
|
|
33
|
+
source passages even when redaction is enabled.
|
|
29
34
|
- Prefer summaries and citations over dumping long private passages into the chat.
|
|
30
35
|
- If the user asks for a high-stakes answer, identify which facts came from Mimir and which still require professional or official verification.
|
|
31
36
|
|
|
@@ -35,6 +40,7 @@ From the repository root:
|
|
|
35
40
|
|
|
36
41
|
```bash
|
|
37
42
|
pnpm exec kb status
|
|
43
|
+
pnpm exec kb security-audit
|
|
38
44
|
```
|
|
39
45
|
|
|
40
46
|
If Mimir is not installed:
|
|
@@ -51,6 +57,34 @@ npm install --save-dev @jcode.labs/mimir
|
|
|
51
57
|
npx kb init
|
|
52
58
|
```
|
|
53
59
|
|
|
60
|
+
## Provider Modes
|
|
61
|
+
|
|
62
|
+
Default retrieval mode:
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{
|
|
66
|
+
"embeddingProvider": "local-hash"
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
This supports ingestion, search, MCP retrieval, and `kb ask` with cited passages without a model
|
|
71
|
+
server. It is lexical/hash retrieval, not model-semantic search. Do not present it as equivalent to
|
|
72
|
+
semantic embeddings.
|
|
73
|
+
|
|
74
|
+
Optional semantic embedding mode:
|
|
75
|
+
|
|
76
|
+
```json
|
|
77
|
+
{
|
|
78
|
+
"embeddingProvider": "transformers",
|
|
79
|
+
"embeddingModel": "mixedbread-ai/mxbai-embed-xsmall-v1",
|
|
80
|
+
"embeddingModelPath": ".mimir/models",
|
|
81
|
+
"transformersAllowRemoteModels": false
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
This uses Transformers.js for embeddings only. Keep `transformersAllowRemoteModels` false for
|
|
86
|
+
air-gapped or confidential work and preload model files under `embeddingModelPath`.
|
|
87
|
+
|
|
54
88
|
## Ingestion Workflow
|
|
55
89
|
|
|
56
90
|
After documents are added or changed:
|
|
@@ -58,10 +92,12 @@ After documents are added or changed:
|
|
|
58
92
|
```bash
|
|
59
93
|
pnpm exec kb ingest
|
|
60
94
|
pnpm exec kb audit
|
|
95
|
+
pnpm exec kb security-audit
|
|
61
96
|
pnpm exec kb status
|
|
62
97
|
```
|
|
63
98
|
|
|
64
|
-
The audit must show no missing or stale supported files before relying on the index.
|
|
99
|
+
The audit must show no missing or stale supported files before relying on the index. The security
|
|
100
|
+
audit should not show warnings before relying on Mimir for sensitive work.
|
|
65
101
|
|
|
66
102
|
## Query Workflow
|
|
67
103
|
|
|
@@ -71,13 +107,32 @@ Use search when you need exact source passages:
|
|
|
71
107
|
pnpm exec kb search "your query"
|
|
72
108
|
```
|
|
73
109
|
|
|
74
|
-
Use ask when you need
|
|
110
|
+
Use ask when you need cited context for the current agent or an external LLM:
|
|
75
111
|
|
|
76
112
|
```bash
|
|
77
113
|
pnpm exec kb ask "your question"
|
|
78
114
|
```
|
|
79
115
|
|
|
80
|
-
Ground answers in returned sources. If search results are weak, say that the current index does not
|
|
116
|
+
Ground answers in returned sources. If search results are weak, say that the current index does not
|
|
117
|
+
prove the point and ask for the missing document. `kb ask` returns cited passages rather than LLM
|
|
118
|
+
synthesis. Use those passages as context for the current agent, or tell the user that generative
|
|
119
|
+
synthesis needs a trusted external LLM or model runtime.
|
|
120
|
+
|
|
121
|
+
## Deep Research Workflow
|
|
122
|
+
|
|
123
|
+
For broad summaries, audits, planning, or institutional dossiers, do not rely on one query. Build a
|
|
124
|
+
small retrieval plan first:
|
|
125
|
+
|
|
126
|
+
- check `kb audit` and `kb security-audit`;
|
|
127
|
+
- query the main topic;
|
|
128
|
+
- query names, dates, amounts, obligations, risks, decisions, and missing evidence separately;
|
|
129
|
+
- compare the strongest passages across files;
|
|
130
|
+
- ask a synthesis question only after search has found enough grounded context;
|
|
131
|
+
- cite source paths and chunk numbers in the answer when useful;
|
|
132
|
+
- explicitly say when the index does not prove a claim.
|
|
133
|
+
|
|
134
|
+
For sensitive work, prefer the smallest useful `topK`; raise it only when the first results are too
|
|
135
|
+
thin. Do not dump large raw passages into the chat unless the user explicitly asks for extracts.
|
|
81
136
|
|
|
82
137
|
## MCP Usage
|
|
83
138
|
|
|
@@ -99,11 +154,27 @@ Available MCP tools:
|
|
|
99
154
|
|
|
100
155
|
- `mimir_status`: show config and chunk count.
|
|
101
156
|
- `mimir_search`: retrieve source passages.
|
|
102
|
-
- `mimir_ask`:
|
|
157
|
+
- `mimir_ask`: return cited retrieval context.
|
|
103
158
|
- `mimir_audit`: compare source files with the current index.
|
|
159
|
+
- `mimir_security_audit`: inspect local privacy, provider, redaction, MCP, and gitignore posture.
|
|
104
160
|
|
|
105
161
|
Prefer MCP tools over shell commands when the agent runtime provides them. Use shell commands when MCP is unavailable.
|
|
106
162
|
|
|
163
|
+
MCP is read-focused and intentionally does not expose index deletion. Use `pnpm exec kb
|
|
164
|
+
destroy-index --yes` from the shell when the user explicitly wants to remove the generated index.
|
|
165
|
+
|
|
166
|
+
## Optional Audio Summaries
|
|
167
|
+
|
|
168
|
+
If the user asks for a listenable or TTS summary, load the optional
|
|
169
|
+
`.mimir/skills/mimir-audio-summary/` skill installed by `pnpm exec kb install-skill`.
|
|
170
|
+
|
|
171
|
+
That skill should:
|
|
172
|
+
|
|
173
|
+
- gather evidence through Mimir first;
|
|
174
|
+
- write narration text only to a temp file outside the repository;
|
|
175
|
+
- render generated audio under `.mimir/audio/` by default;
|
|
176
|
+
- prefer offline TTS engines for confidential content.
|
|
177
|
+
|
|
107
178
|
## Installing This Skill Into A Repository
|
|
108
179
|
|
|
109
180
|
Run:
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: mimir-audio-summary
|
|
3
|
+
description: >-
|
|
4
|
+
Create an optional spoken audio summary from a Mimir local knowledge base. Use when the user asks
|
|
5
|
+
for an audio, TTS, spoken brief, briefing, narration, or listenable summary based on private
|
|
6
|
+
repository documents indexed by Mimir. The skill is confidentiality-first: gather facts through
|
|
7
|
+
Mimir, write only a temporary narration text file outside the repository, and render the final
|
|
8
|
+
audio under ignored local Mimir state unless the user explicitly chooses another output path.
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Mimir Audio Summary
|
|
12
|
+
|
|
13
|
+
Use this skill to turn a confidential local Mimir knowledge base into an optional audio summary.
|
|
14
|
+
The knowledge base stays local; the final audio is a generated artifact and must not be committed.
|
|
15
|
+
|
|
16
|
+
## Confidentiality Rules
|
|
17
|
+
|
|
18
|
+
- Treat the source documents, retrieved passages, generated narration, and final audio as sensitive.
|
|
19
|
+
- Do not use online TTS for confidential content unless the user explicitly allows it.
|
|
20
|
+
- Prefer `pnpm exec kb audio` or `pnpm exec mimir-tts render` for plug-and-play local WAV output.
|
|
21
|
+
- Use `--offline` when model files are already present and remote model loading is not allowed.
|
|
22
|
+
- Write the narration text to a temp file outside the repository, such as `/tmp/MIMIR-SUMMARY-topic.txt`.
|
|
23
|
+
- Render audio under `.mimir/audio/` by default. This directory is ignored by Git when Mimir is installed.
|
|
24
|
+
- Never stage or commit generated audio, temporary text, WAV, AIFF, or intermediate files.
|
|
25
|
+
|
|
26
|
+
## 1. Verify The Knowledge Base
|
|
27
|
+
|
|
28
|
+
From the repository root, run:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pnpm exec kb status
|
|
32
|
+
pnpm exec kb audit
|
|
33
|
+
pnpm exec kb security-audit
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
If the audit reports missing or stale files, run:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pnpm exec kb ingest
|
|
40
|
+
pnpm exec kb audit
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Do not create an audio summary from stale or incomplete evidence unless the user explicitly accepts
|
|
44
|
+
that limitation.
|
|
45
|
+
|
|
46
|
+
## 2. Search Deeply Before Writing
|
|
47
|
+
|
|
48
|
+
Use Mimir search or MCP tools to gather evidence before drafting the narration.
|
|
49
|
+
|
|
50
|
+
For a broad summary, run multiple searches:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pnpm exec kb search "<main topic>" --top-k 8
|
|
54
|
+
pnpm exec kb search "<people, dates, money, obligations, risks, or decisions>" --top-k 8
|
|
55
|
+
pnpm exec kb ask "<specific synthesis question>" --top-k 8
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
When MCP is available, prefer `mimir_search`, `mimir_ask`, `mimir_audit`, and
|
|
59
|
+
`mimir_security_audit` over shell commands.
|
|
60
|
+
|
|
61
|
+
Keep citations in your working notes, but do not read long raw passages aloud. The audio should be a
|
|
62
|
+
clear synthesis, not a dump of source text.
|
|
63
|
+
|
|
64
|
+
## 3. Write For Listening
|
|
65
|
+
|
|
66
|
+
Write one flowing narration in the user's working language. Do not use markdown, headings, bullets,
|
|
67
|
+
tables, SSML, XML tags, or stage directions in the spoken text.
|
|
68
|
+
|
|
69
|
+
Good audio structure:
|
|
70
|
+
|
|
71
|
+
1. Start with the purpose of the summary and the two-to-four ideas to retain.
|
|
72
|
+
2. Explain the current evidence in plain language.
|
|
73
|
+
3. Separate proven facts from uncertainty.
|
|
74
|
+
4. Highlight decisions, risks, deadlines, and missing documents.
|
|
75
|
+
5. End with a concise recap and two or three self-check questions.
|
|
76
|
+
|
|
77
|
+
Use short sentences and natural punctuation. Spell acronyms and symbols in a way a TTS engine can
|
|
78
|
+
pronounce.
|
|
79
|
+
|
|
80
|
+
## 4. Render The Audio
|
|
81
|
+
|
|
82
|
+
Create the output directory and write the narration to a temp file outside the repo:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
mkdir -p .mimir/audio
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Then render with the default Mimir TTS path:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pnpm exec kb audio /tmp/MIMIR-SUMMARY-<subject-kebab>.txt \
|
|
92
|
+
--out .mimir/audio/MIMIR-SUMMARY-<subject-kebab>.wav
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
For air-gapped operation, preload the model files under `.mimir/models/tts` and run:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pnpm exec kb audio /tmp/MIMIR-SUMMARY-<subject-kebab>.txt \
|
|
99
|
+
--out .mimir/audio/MIMIR-SUMMARY-<subject-kebab>.wav \
|
|
100
|
+
--offline
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
The default renderer uses `@jcode.labs/mimir-tts` and Transformers.js. It does not require Python,
|
|
104
|
+
ffmpeg, Piper, XTTS, or a local TTS server. The first non-offline render can download public model
|
|
105
|
+
files into `.mimir/models/tts`, but the narration text is processed locally.
|
|
106
|
+
|
|
107
|
+
Use the legacy voice-forge helper only when the user explicitly wants MP3 output or an engine not
|
|
108
|
+
covered by Mimir TTS:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
OUT_MP3="<repo-root>/.mimir/audio/MIMIR-SUMMARY-<subject-kebab>.mp3" \
|
|
112
|
+
TTS_ENGINE=auto \
|
|
113
|
+
bash <this-skill-dir>/forge-voice.sh /tmp/MIMIR-SUMMARY-<subject-kebab>.txt
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Engine selection:
|
|
117
|
+
|
|
118
|
+
- `auto`: confidentiality-first order: XTTS, macOS `say`, Piper, then online Edge only when
|
|
119
|
+
`MIMIR_ALLOW_ONLINE_TTS=1`.
|
|
120
|
+
- `xtts`: local Coqui XTTS-v2.
|
|
121
|
+
- `say`: local macOS speech engine, converted to MP3 with `ffmpeg`.
|
|
122
|
+
- `piper`: local neural TTS, converted to MP3 with `ffmpeg`.
|
|
123
|
+
- `edge`: online Edge TTS. Use only with explicit user approval for non-sensitive text.
|
|
124
|
+
|
|
125
|
+
Voice can be selected with `TTS_VOICE`. Speed for Edge can be selected with `TTS_RATE`.
|
|
126
|
+
|
|
127
|
+
## 5. Report The Result
|
|
128
|
+
|
|
129
|
+
After rendering, report:
|
|
130
|
+
|
|
131
|
+
- the audio path;
|
|
132
|
+
- which renderer/model was used or requested;
|
|
133
|
+
- whether remote model downloads or online TTS were allowed;
|
|
134
|
+
- any evidence limitation, such as stale index, missing documents, or weak search results.
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Render a Mimir audio-summary text file to MP3. The text is a throwaway intermediate written
|
|
3
|
+
# outside the repository by the skill. The final audio should normally be written under .mimir/audio.
|
|
4
|
+
#
|
|
5
|
+
# Engine via TTS_ENGINE (auto|xtts|say|piper|edge); default "auto" is confidentiality-first:
|
|
6
|
+
# xtts - local Coqui XTTS-v2 when installed.
|
|
7
|
+
# say - macOS built-in, offline, clean but robotic.
|
|
8
|
+
# piper - local neural TTS.
|
|
9
|
+
# edge - online Edge TTS only when TTS_ENGINE=edge or MIMIR_ALLOW_ONLINE_TTS=1.
|
|
10
|
+
#
|
|
11
|
+
# Usage: forge-voice.sh <text-file> [voice]
|
|
12
|
+
# Env: OUT_MP3 explicit mp3 output path
|
|
13
|
+
# TTS_ENGINE force an engine (default: auto)
|
|
14
|
+
# TTS_VOICE voice/speaker (engine-specific); the [voice] arg overrides it
|
|
15
|
+
# TTS_RATE edge-tts speed delta, default +0%
|
|
16
|
+
# TTS_SEGMENT=1 edge: render sentence-by-sentence
|
|
17
|
+
# XTTS_SPEAKER xtts preset speaker (default Ana Florence)
|
|
18
|
+
# PIPER_MODEL piper onnx model path
|
|
19
|
+
# MIMIR_ALLOW_ONLINE_TTS=1 allow edge in auto mode
|
|
20
|
+
# KEEP_TEXT=1 keep the source text file after a successful render
|
|
21
|
+
set -euo pipefail
|
|
22
|
+
|
|
23
|
+
TXT="${1:?usage: forge-voice.sh <text-file> [voice]}"
|
|
24
|
+
[ -f "$TXT" ] || { echo "error: file not found: $TXT" >&2; exit 1; }
|
|
25
|
+
VOICE="${2:-${TTS_VOICE:-}}"
|
|
26
|
+
ENGINE="${TTS_ENGINE:-auto}"
|
|
27
|
+
OUT_FINAL="${OUT_MP3:-${TXT%.txt}.mp3}"
|
|
28
|
+
OUTBASE="${OUT_FINAL%.mp3}"
|
|
29
|
+
|
|
30
|
+
cleanup() {
|
|
31
|
+
[ "${KEEP_TEXT:-0}" = "1" ] || rm -f "$TXT"
|
|
32
|
+
rm -f "${OUTBASE}.wav" "${OUTBASE}.aiff"
|
|
33
|
+
}
|
|
34
|
+
trap cleanup EXIT
|
|
35
|
+
|
|
36
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
37
|
+
XTTS_PY="${XTTS_PY:-$HOME/.local/share/voice-forge/xtts/bin/python}"
|
|
38
|
+
XTTS_HELPER="$SCRIPT_DIR/xtts-voice.py"
|
|
39
|
+
|
|
40
|
+
xtts_ready() { [ -x "$XTTS_PY" ] && [ -f "$XTTS_HELPER" ]; }
|
|
41
|
+
|
|
42
|
+
finish() {
|
|
43
|
+
echo "$1"
|
|
44
|
+
exit 0
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
to_mp3() {
|
|
48
|
+
local src="$1" out="${OUTBASE}.mp3"
|
|
49
|
+
if ! command -v ffmpeg >/dev/null 2>&1; then
|
|
50
|
+
rm -f "$src"
|
|
51
|
+
echo "error: ffmpeg is required to convert local TTS output to mp3" >&2
|
|
52
|
+
exit 1
|
|
53
|
+
fi
|
|
54
|
+
if ffmpeg -y -loglevel error -i "$src" -ac 1 -c:a libmp3lame -q:a 4 "$out"; then
|
|
55
|
+
rm -f "$src"
|
|
56
|
+
finish "$out"
|
|
57
|
+
fi
|
|
58
|
+
rm -f "$src" "$out"
|
|
59
|
+
echo "error: failed to convert local TTS output to mp3" >&2
|
|
60
|
+
exit 1
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if [ "$ENGINE" = "xtts" ] || { [ "$ENGINE" = "auto" ] && xtts_ready; }; then
|
|
64
|
+
if ! xtts_ready; then
|
|
65
|
+
echo "error: TTS_ENGINE=xtts but venv/helper missing ($XTTS_PY)" >&2
|
|
66
|
+
exit 1
|
|
67
|
+
fi
|
|
68
|
+
WAV="${OUTBASE}.wav"
|
|
69
|
+
if [ -n "$VOICE" ]; then
|
|
70
|
+
COQUI_TOS_AGREED=1 "$XTTS_PY" "$XTTS_HELPER" "$TXT" "$WAV" "$VOICE" >&2
|
|
71
|
+
else
|
|
72
|
+
COQUI_TOS_AGREED=1 "$XTTS_PY" "$XTTS_HELPER" "$TXT" "$WAV" >&2
|
|
73
|
+
fi
|
|
74
|
+
to_mp3 "$WAV"
|
|
75
|
+
fi
|
|
76
|
+
|
|
77
|
+
if [ "$ENGINE" = "say" ] || [ "$ENGINE" = "auto" ]; then
|
|
78
|
+
if command -v say >/dev/null 2>&1; then
|
|
79
|
+
AIFF="${OUTBASE}.aiff"
|
|
80
|
+
say -v "${VOICE:-Jacques}" -f "$TXT" -o "$AIFF" 2>/dev/null || say -f "$TXT" -o "$AIFF"
|
|
81
|
+
to_mp3 "$AIFF"
|
|
82
|
+
fi
|
|
83
|
+
if [ "$ENGINE" = "say" ]; then
|
|
84
|
+
echo "error: TTS_ENGINE=say but 'say' not available" >&2
|
|
85
|
+
exit 1
|
|
86
|
+
fi
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
if [ "$ENGINE" = "piper" ] || [ "$ENGINE" = "auto" ]; then
|
|
90
|
+
if command -v piper >/dev/null 2>&1; then
|
|
91
|
+
WAV="${OUTBASE}.wav"
|
|
92
|
+
piper -m "${PIPER_MODEL:-fr_FR-siwis-medium.onnx}" -f "$WAV" < "$TXT"
|
|
93
|
+
to_mp3 "$WAV"
|
|
94
|
+
fi
|
|
95
|
+
if [ "$ENGINE" = "piper" ]; then
|
|
96
|
+
echo "error: TTS_ENGINE=piper but piper not installed (pip install piper-tts)" >&2
|
|
97
|
+
exit 1
|
|
98
|
+
fi
|
|
99
|
+
fi
|
|
100
|
+
|
|
101
|
+
if [ "$ENGINE" = "edge" ] || { [ "$ENGINE" = "auto" ] && [ "${MIMIR_ALLOW_ONLINE_TTS:-0}" = "1" ]; }; then
|
|
102
|
+
if command -v edge-tts >/dev/null 2>&1; then
|
|
103
|
+
OUT="${OUTBASE}.mp3"
|
|
104
|
+
SPLIT="$SCRIPT_DIR/split-lines.py"
|
|
105
|
+
voice="${VOICE:-fr-FR-DeniseNeural}"
|
|
106
|
+
rate="${TTS_RATE:-+0%}"
|
|
107
|
+
if [ "${TTS_SEGMENT:-0}" = "1" ] && [ -f "$SPLIT" ] \
|
|
108
|
+
&& command -v ffmpeg >/dev/null 2>&1 && command -v python3 >/dev/null 2>&1; then
|
|
109
|
+
TMP="$(mktemp -d)"
|
|
110
|
+
ffmpeg -y -loglevel error -f lavfi -i anullsrc=r=24000:cl=mono -t 0.28 \
|
|
111
|
+
-c:a libmp3lame -q:a 4 "$TMP/sil.mp3"
|
|
112
|
+
i=0
|
|
113
|
+
: > "$TMP/list.txt"
|
|
114
|
+
while IFS= read -r line; do
|
|
115
|
+
[ -z "$line" ] && continue
|
|
116
|
+
i=$((i + 1))
|
|
117
|
+
if edge-tts --text "$line" --voice "$voice" --rate="$rate" \
|
|
118
|
+
--write-media "$TMP/seg_$i.mp3" >/dev/null 2>&1; then
|
|
119
|
+
printf "file '%s'\n" "$TMP/seg_$i.mp3" >> "$TMP/list.txt"
|
|
120
|
+
printf "file '%s'\n" "$TMP/sil.mp3" >> "$TMP/list.txt"
|
|
121
|
+
else
|
|
122
|
+
echo "warn: edge-tts failed on a sentence, skipping it" >&2
|
|
123
|
+
fi
|
|
124
|
+
done < <(python3 "$SPLIT" "$TXT")
|
|
125
|
+
ffmpeg -y -loglevel error -f concat -safe 0 -i "$TMP/list.txt" \
|
|
126
|
+
-ac 1 -c:a libmp3lame -q:a 4 "$OUT"
|
|
127
|
+
rm -rf "$TMP"
|
|
128
|
+
finish "$OUT"
|
|
129
|
+
fi
|
|
130
|
+
edge-tts --file "$TXT" --voice "$voice" --rate="$rate" --write-media "$OUT" >/dev/null
|
|
131
|
+
finish "$OUT"
|
|
132
|
+
fi
|
|
133
|
+
if [ "$ENGINE" = "edge" ]; then
|
|
134
|
+
echo "error: TTS_ENGINE=edge but edge-tts not installed (pipx install edge-tts)" >&2
|
|
135
|
+
exit 1
|
|
136
|
+
fi
|
|
137
|
+
fi
|
|
138
|
+
|
|
139
|
+
cat >&2 <<'EOF'
|
|
140
|
+
error: no offline TTS engine available. Install one:
|
|
141
|
+
XTTS-v2 (local):
|
|
142
|
+
uv venv --python 3.11 ~/.local/share/voice-forge/xtts
|
|
143
|
+
uv pip install --python ~/.local/share/voice-forge/xtts/bin/python \
|
|
144
|
+
coqui-tts 'transformers>=4.57,<5' 'torch==2.8.*' 'torchaudio==2.8.*'
|
|
145
|
+
piper (local): pip install piper-tts
|
|
146
|
+
ffmpeg: required for local engine MP3 conversion
|
|
147
|
+
|
|
148
|
+
Online fallback:
|
|
149
|
+
pipx install edge-tts
|
|
150
|
+
MIMIR_ALLOW_ONLINE_TTS=1 TTS_ENGINE=auto ...
|
|
151
|
+
or explicitly set TTS_ENGINE=edge for non-sensitive text.
|
|
152
|
+
EOF
|
|
153
|
+
exit 127
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Print one sentence per line from a text file.
|
|
2
|
+
|
|
3
|
+
Used to render long narrations sentence-by-sentence when an engine benefits from segmentation.
|
|
4
|
+
"""
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
raw = open(sys.argv[1], encoding="utf-8").read()
|
|
9
|
+
text = re.sub(r"\s+", " ", raw).strip()
|
|
10
|
+
for part in re.split(r"(?<=[.!?…])\s+", text):
|
|
11
|
+
part = part.strip()
|
|
12
|
+
if part:
|
|
13
|
+
print(part)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Render a text file to WAV with Coqui XTTS-v2.
|
|
3
|
+
|
|
4
|
+
Run with the dedicated venv interpreter:
|
|
5
|
+
~/.local/share/voice-forge/xtts/bin/python xtts-voice.py <text.txt> <out.wav> [speaker]
|
|
6
|
+
|
|
7
|
+
Env: XTTS_SPEAKER (preset name), XTTS_LANG (default "fr").
|
|
8
|
+
"""
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
os.environ.setdefault("COQUI_TOS_AGREED", "1")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main() -> None:
|
|
16
|
+
if len(sys.argv) < 3:
|
|
17
|
+
sys.exit("usage: xtts-voice.py <text.txt> <out.wav> [speaker]")
|
|
18
|
+
text_file, out_wav = sys.argv[1], sys.argv[2]
|
|
19
|
+
speaker = sys.argv[3] if len(sys.argv) > 3 else os.environ.get("XTTS_SPEAKER", "Ana Florence")
|
|
20
|
+
language = os.environ.get("XTTS_LANG", "fr")
|
|
21
|
+
|
|
22
|
+
with open(text_file, encoding="utf-8") as handle:
|
|
23
|
+
text = handle.read().strip()
|
|
24
|
+
if not text:
|
|
25
|
+
sys.exit("error: empty text file")
|
|
26
|
+
|
|
27
|
+
from TTS.api import TTS
|
|
28
|
+
|
|
29
|
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
|
30
|
+
speakers = list(getattr(tts, "speakers", None) or [])
|
|
31
|
+
if speakers and speaker not in speakers:
|
|
32
|
+
sys.stderr.write(f"speaker '{speaker}' not found; falling back to '{speakers[0]}'\n")
|
|
33
|
+
speaker = speakers[0]
|
|
34
|
+
|
|
35
|
+
tts.tts_to_file(
|
|
36
|
+
text=text,
|
|
37
|
+
speaker=speaker,
|
|
38
|
+
language=language,
|
|
39
|
+
file_path=out_wav,
|
|
40
|
+
split_sentences=True,
|
|
41
|
+
)
|
|
42
|
+
print(out_wav)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
main()
|