mimic-tts 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mimic_tts-0.0.1/.gitignore +32 -0
- mimic_tts-0.0.1/PKG-INFO +24 -0
- mimic_tts-0.0.1/README.md +9 -0
- mimic_tts-0.0.1/mimic/__init__.py +7 -0
- mimic_tts-0.0.1/mimic/_base.py +66 -0
- mimic_tts-0.0.1/mimic/_version.py +1 -0
- mimic_tts-0.0.1/mimic/async_client.py +147 -0
- mimic_tts-0.0.1/mimic/cli.py +157 -0
- mimic_tts-0.0.1/mimic/client.py +148 -0
- mimic_tts-0.0.1/mimic/config.py +71 -0
- mimic_tts-0.0.1/mimic/errors.py +28 -0
- mimic_tts-0.0.1/mimic/recorder.py +75 -0
- mimic_tts-0.0.1/pyproject.toml +29 -0
- mimic_tts-0.0.1/tests/test_async_client.py +105 -0
- mimic_tts-0.0.1/tests/test_base.py +90 -0
- mimic_tts-0.0.1/tests/test_cli.py +133 -0
- mimic_tts-0.0.1/tests/test_client.py +174 -0
- mimic_tts-0.0.1/tests/test_client_config.py +78 -0
- mimic_tts-0.0.1/tests/test_recorder.py +88 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Python-generated files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[oc]
|
|
4
|
+
build/
|
|
5
|
+
dist/
|
|
6
|
+
wheels/
|
|
7
|
+
*.egg-info
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv
|
|
11
|
+
|
|
12
|
+
# Voice reference samples (personal data — don't commit recordings)
|
|
13
|
+
server/reference/*
|
|
14
|
+
!server/reference/.gitkeep
|
|
15
|
+
reference/*
|
|
16
|
+
!reference/.gitkeep
|
|
17
|
+
|
|
18
|
+
# Build artifacts
|
|
19
|
+
dist/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
|
|
22
|
+
# Local secrets / env
|
|
23
|
+
.env
|
|
24
|
+
.env.local
|
|
25
|
+
|
|
26
|
+
# IDE / OS junk
|
|
27
|
+
.DS_Store
|
|
28
|
+
.idea/
|
|
29
|
+
.vscode/
|
|
30
|
+
|
|
31
|
+
# Audio recordings outside the reference dir
|
|
32
|
+
/*.wav
|
mimic_tts-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mimic-tts
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Client for mimic-tts (Qwen3-TTS voice cloning + synthesis)
|
|
5
|
+
Author: Jim Vogel
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: <3.14,>=3.12
|
|
8
|
+
Requires-Dist: httpx>=0.27
|
|
9
|
+
Requires-Dist: numpy>=1.26
|
|
10
|
+
Requires-Dist: platformdirs>=4.0
|
|
11
|
+
Requires-Dist: sounddevice>=0.4
|
|
12
|
+
Requires-Dist: soundfile>=0.12
|
|
13
|
+
Requires-Dist: typer>=0.12
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# mimic-tts
|
|
17
|
+
|
|
18
|
+
Python client and CLI for the [mimic-tts](https://github.com/voglster/mimic-tts) server.
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install mimic-tts
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Full documentation: <https://github.com/voglster/mimic-tts>
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Shared request-building and error-translation logic."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from mimic.errors import (
|
|
12
|
+
MimicAPIError,
|
|
13
|
+
MimicAuthError,
|
|
14
|
+
MimicNotFoundError,
|
|
15
|
+
MimicValidationError,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class RequestSpec:
|
|
21
|
+
method: str
|
|
22
|
+
url: str
|
|
23
|
+
headers: dict[str, str] = field(default_factory=dict)
|
|
24
|
+
data: dict[str, Any] | None = None
|
|
25
|
+
files: dict[str, Any] | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_request_spec(
|
|
29
|
+
*,
|
|
30
|
+
base_url: str,
|
|
31
|
+
method: str,
|
|
32
|
+
path: str,
|
|
33
|
+
token: str | None,
|
|
34
|
+
data: dict[str, Any] | None = None,
|
|
35
|
+
files: dict[str, Any] | None = None,
|
|
36
|
+
) -> RequestSpec:
|
|
37
|
+
if not path.startswith("/"):
|
|
38
|
+
raise ValueError(f"path must start with '/': {path!r}")
|
|
39
|
+
url = base_url.rstrip("/") + path
|
|
40
|
+
headers: dict[str, str] = {}
|
|
41
|
+
if token is not None:
|
|
42
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
43
|
+
return RequestSpec(method=method, url=url, headers=headers, data=data, files=files)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _extract_detail(response: httpx.Response) -> str:
|
|
47
|
+
try:
|
|
48
|
+
body = response.json()
|
|
49
|
+
if isinstance(body, dict) and "detail" in body:
|
|
50
|
+
return str(body["detail"])
|
|
51
|
+
except Exception: # noqa: S110
|
|
52
|
+
pass
|
|
53
|
+
return response.text or response.reason_phrase or ""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def raise_for_response(response: httpx.Response) -> None:
|
|
57
|
+
if response.status_code < 400:
|
|
58
|
+
return
|
|
59
|
+
detail = _extract_detail(response)
|
|
60
|
+
if response.status_code == 401:
|
|
61
|
+
raise MimicAuthError(response.status_code, detail)
|
|
62
|
+
if response.status_code == 404:
|
|
63
|
+
raise MimicNotFoundError(response.status_code, detail)
|
|
64
|
+
if 400 <= response.status_code < 500:
|
|
65
|
+
raise MimicValidationError(response.status_code, detail)
|
|
66
|
+
raise MimicAPIError(response.status_code, detail)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.1"
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Asynchronous client for the mimic-tts server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from io import BufferedReader
|
|
13
|
+
|
|
14
|
+
from mimic._base import build_request_spec, raise_for_response
|
|
15
|
+
from mimic.client import _as_upload
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AsyncClient:
|
|
19
|
+
"""Async client. Use as `async with AsyncClient(...) as c:`."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
server_url: str | None = None,
|
|
24
|
+
token: str | None = None,
|
|
25
|
+
timeout: float = 60.0,
|
|
26
|
+
transport: httpx.AsyncBaseTransport | httpx.BaseTransport | None = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
self._base_url = server_url or os.environ.get("MIMIC_SERVER_URL") or "http://localhost:8000"
|
|
29
|
+
self._token = token if token is not None else os.environ.get("MIMIC_API_TOKEN")
|
|
30
|
+
self._http = httpx.AsyncClient(timeout=timeout, transport=transport) # type: ignore[arg-type]
|
|
31
|
+
|
|
32
|
+
async def __aenter__(self) -> AsyncClient:
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
async def __aexit__(self, *exc: object) -> None:
|
|
36
|
+
await self.close()
|
|
37
|
+
|
|
38
|
+
async def close(self) -> None:
|
|
39
|
+
await self._http.aclose()
|
|
40
|
+
|
|
41
|
+
async def _request_json(self, method: str, path: str, **kwargs: Any) -> Any:
|
|
42
|
+
spec = build_request_spec(
|
|
43
|
+
base_url=self._base_url,
|
|
44
|
+
method=method,
|
|
45
|
+
path=path,
|
|
46
|
+
token=self._token,
|
|
47
|
+
**kwargs,
|
|
48
|
+
)
|
|
49
|
+
r = await self._http.request(
|
|
50
|
+
spec.method,
|
|
51
|
+
spec.url,
|
|
52
|
+
headers=spec.headers,
|
|
53
|
+
data=spec.data,
|
|
54
|
+
files=spec.files,
|
|
55
|
+
)
|
|
56
|
+
raise_for_response(r)
|
|
57
|
+
return r.json()
|
|
58
|
+
|
|
59
|
+
async def _request_audio(self, method: str, path: str, **kwargs: Any) -> bytes:
|
|
60
|
+
spec = build_request_spec(
|
|
61
|
+
base_url=self._base_url,
|
|
62
|
+
method=method,
|
|
63
|
+
path=path,
|
|
64
|
+
token=self._token,
|
|
65
|
+
**kwargs,
|
|
66
|
+
)
|
|
67
|
+
r = await self._http.request(
|
|
68
|
+
spec.method,
|
|
69
|
+
spec.url,
|
|
70
|
+
headers=spec.headers,
|
|
71
|
+
data=spec.data,
|
|
72
|
+
files=spec.files,
|
|
73
|
+
)
|
|
74
|
+
raise_for_response(r)
|
|
75
|
+
return r.content
|
|
76
|
+
|
|
77
|
+
async def health(self) -> dict[str, Any]:
|
|
78
|
+
return await self._request_json("GET", "/health")
|
|
79
|
+
|
|
80
|
+
async def list_voices(self) -> list[dict[str, str]]:
|
|
81
|
+
return (await self._request_json("GET", "/voices"))["voices"]
|
|
82
|
+
|
|
83
|
+
async def list_clones(self) -> list[str]:
|
|
84
|
+
return (await self._request_json("GET", "/clone/voices"))["voices"]
|
|
85
|
+
|
|
86
|
+
async def tts(
|
|
87
|
+
self,
|
|
88
|
+
text: str,
|
|
89
|
+
*,
|
|
90
|
+
language: str = "English",
|
|
91
|
+
speaker: str = "Ryan",
|
|
92
|
+
instruct: str = "",
|
|
93
|
+
) -> bytes:
|
|
94
|
+
return await self._request_audio(
|
|
95
|
+
"POST",
|
|
96
|
+
"/tts",
|
|
97
|
+
data={"text": text, "language": language, "speaker": speaker, "instruct": instruct},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
async def tts_to_file(self, text: str, out: Path | str, **kwargs: Any) -> Path:
|
|
101
|
+
audio = await self.tts(text, **kwargs)
|
|
102
|
+
out_path = Path(out)
|
|
103
|
+
out_path.write_bytes(audio)
|
|
104
|
+
return out_path
|
|
105
|
+
|
|
106
|
+
async def clone_register(
|
|
107
|
+
self,
|
|
108
|
+
name: str,
|
|
109
|
+
audio: Path | str | bytes | BufferedReader,
|
|
110
|
+
transcript: str,
|
|
111
|
+
) -> dict[str, str]:
|
|
112
|
+
files = {"ref_audio": _as_upload(audio)}
|
|
113
|
+
return await self._request_json(
|
|
114
|
+
"POST",
|
|
115
|
+
"/clone/register",
|
|
116
|
+
data={"name": name, "ref_text": transcript},
|
|
117
|
+
files=files,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
async def clone_tts(
|
|
121
|
+
self,
|
|
122
|
+
name: str,
|
|
123
|
+
text: str,
|
|
124
|
+
*,
|
|
125
|
+
language: str = "English",
|
|
126
|
+
) -> bytes:
|
|
127
|
+
return await self._request_audio(
|
|
128
|
+
"POST",
|
|
129
|
+
"/clone/tts",
|
|
130
|
+
data={"text": text, "language": language, "name": name},
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
async def clone_oneshot(
|
|
134
|
+
self,
|
|
135
|
+
text: str,
|
|
136
|
+
audio: Path | str | bytes | BufferedReader,
|
|
137
|
+
transcript: str,
|
|
138
|
+
*,
|
|
139
|
+
language: str = "English",
|
|
140
|
+
) -> bytes:
|
|
141
|
+
files = {"ref_audio": _as_upload(audio)}
|
|
142
|
+
return await self._request_audio(
|
|
143
|
+
"POST",
|
|
144
|
+
"/clone/oneshot",
|
|
145
|
+
data={"text": text, "language": language, "ref_text": transcript},
|
|
146
|
+
files=files,
|
|
147
|
+
)
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""`mimic` CLI — typer-based command-line interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Annotated
|
|
10
|
+
|
|
11
|
+
import typer
|
|
12
|
+
|
|
13
|
+
from mimic.client import Client
|
|
14
|
+
from mimic.config import load_config
|
|
15
|
+
from mimic.recorder import (
|
|
16
|
+
DEFAULT_SAMPLE_RATE,
|
|
17
|
+
pick_script,
|
|
18
|
+
play,
|
|
19
|
+
record_until_enter,
|
|
20
|
+
save_wav,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
app = typer.Typer(no_args_is_help=True, add_completion=False, help="mimic-tts CLI")
|
|
24
|
+
clone_app = typer.Typer(no_args_is_help=True, help="Clone voice operations")
|
|
25
|
+
app.add_typer(clone_app, name="clone")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _client() -> Client:
|
|
29
|
+
cfg = load_config()
|
|
30
|
+
return Client(server_url=cfg.server_url, token=cfg.token)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@app.command()
|
|
34
|
+
def say(
|
|
35
|
+
text: Annotated[str, typer.Argument(help="Text to synthesize.")],
|
|
36
|
+
voice: Annotated[str | None, typer.Option(help="Speaker name.")] = None,
|
|
37
|
+
out: Annotated[Path, typer.Option(help="Output wav path.")] = Path("out.wav"),
|
|
38
|
+
language: Annotated[str, typer.Option()] = "English",
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Synthesize speech with a built-in voice."""
|
|
41
|
+
cfg = load_config()
|
|
42
|
+
speaker = voice or cfg.default_voice
|
|
43
|
+
with _client() as c:
|
|
44
|
+
c.tts_to_file(text, out, speaker=speaker, language=language)
|
|
45
|
+
typer.echo(f"wrote {out}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@app.command()
|
|
49
|
+
def voices() -> None:
|
|
50
|
+
"""List built-in voices."""
|
|
51
|
+
with _client() as c:
|
|
52
|
+
for v in c.list_voices():
|
|
53
|
+
typer.echo(f"{v['name']:12s} {v['language']}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@app.command()
|
|
57
|
+
def clones() -> None:
|
|
58
|
+
"""List registered clone voices."""
|
|
59
|
+
with _client() as c:
|
|
60
|
+
for name in c.list_clones():
|
|
61
|
+
typer.echo(name)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@app.command()
|
|
65
|
+
def health() -> None:
|
|
66
|
+
"""Show server health and currently loaded models."""
|
|
67
|
+
with _client() as c:
|
|
68
|
+
info = c.health()
|
|
69
|
+
typer.echo(info)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@app.command(name="config")
|
|
73
|
+
def show_config() -> None:
|
|
74
|
+
"""Print the resolved client configuration."""
|
|
75
|
+
cfg = load_config()
|
|
76
|
+
typer.echo(f"server_url {cfg.server_url}")
|
|
77
|
+
typer.echo(f"token {'<set>' if cfg.token else '<none>'}")
|
|
78
|
+
typer.echo(f"default_voice {cfg.default_voice}")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@app.command()
|
|
82
|
+
def record(
|
|
83
|
+
name: Annotated[str, typer.Argument(help="Name to register the clone under.")],
|
|
84
|
+
audio: Annotated[Path | None, typer.Option(help="Skip the recorder; use this file.")] = None,
|
|
85
|
+
text: Annotated[str | None, typer.Option(help="Transcript for --audio.")] = None,
|
|
86
|
+
) -> None:
|
|
87
|
+
"""Record a reference voice and register it on the server."""
|
|
88
|
+
if audio is not None:
|
|
89
|
+
if text is None:
|
|
90
|
+
typer.echo("--text is required when --audio is provided", err=True)
|
|
91
|
+
raise typer.Exit(2)
|
|
92
|
+
with _client() as c:
|
|
93
|
+
result = c.clone_register(name, audio, text)
|
|
94
|
+
typer.echo(f"registered '{result['name']}'")
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
_interactive_record_and_register(name)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@clone_app.command(name="say")
|
|
101
|
+
def clone_say(
|
|
102
|
+
name: Annotated[str, typer.Argument(help="Registered clone name.")],
|
|
103
|
+
text: Annotated[str, typer.Argument()],
|
|
104
|
+
out: Annotated[Path, typer.Option(help="Output wav path.")] = Path("out.wav"),
|
|
105
|
+
language: Annotated[str, typer.Option()] = "English",
|
|
106
|
+
) -> None:
|
|
107
|
+
"""Synthesize speech using a registered clone voice."""
|
|
108
|
+
with _client() as c:
|
|
109
|
+
audio = c.clone_tts(name, text, language=language)
|
|
110
|
+
out.write_bytes(audio)
|
|
111
|
+
typer.echo(f"wrote {out}")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _interactive_record_and_register(name: str) -> None:
|
|
115
|
+
"""Drive the guided recorder. Kept thin; primitives live in `mimic.recorder`."""
|
|
116
|
+
script = pick_script()
|
|
117
|
+
typer.echo(f"\nRead this script when ready:\n\n {script}\n")
|
|
118
|
+
typer.prompt("Press Enter to start recording", default="", show_default=False)
|
|
119
|
+
|
|
120
|
+
typer.echo("Recording… press Enter to stop.")
|
|
121
|
+
stop = threading.Event()
|
|
122
|
+
|
|
123
|
+
def _wait_for_enter() -> None:
|
|
124
|
+
sys.stdin.readline()
|
|
125
|
+
stop.set()
|
|
126
|
+
|
|
127
|
+
waiter = threading.Thread(target=_wait_for_enter)
|
|
128
|
+
waiter.daemon = True
|
|
129
|
+
waiter.start()
|
|
130
|
+
|
|
131
|
+
result = record_until_enter(
|
|
132
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
133
|
+
channels=1,
|
|
134
|
+
max_seconds=30.0,
|
|
135
|
+
stop_event=stop,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
typer.echo("Playing back…")
|
|
139
|
+
play(result.audio, result.sample_rate)
|
|
140
|
+
|
|
141
|
+
keep = typer.prompt("Keep this take? [y/N/r=retry]", default="N").strip().lower()
|
|
142
|
+
if keep == "r":
|
|
143
|
+
_interactive_record_and_register(name)
|
|
144
|
+
return
|
|
145
|
+
if not keep.startswith("y"):
|
|
146
|
+
typer.echo("discarded.")
|
|
147
|
+
raise typer.Exit(0)
|
|
148
|
+
|
|
149
|
+
transcript = typer.prompt("Transcript", default=script)
|
|
150
|
+
|
|
151
|
+
buf = io.BytesIO()
|
|
152
|
+
save_wav(buf, result.audio, sample_rate=result.sample_rate)
|
|
153
|
+
buf.seek(0)
|
|
154
|
+
|
|
155
|
+
with _client() as c:
|
|
156
|
+
out = c.clone_register(name, buf.read(), transcript)
|
|
157
|
+
typer.echo(f"registered '{out['name']}'")
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Synchronous client for the mimic-tts server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from io import BufferedReader
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
from mimic._base import build_request_spec, raise_for_response
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Client:
|
|
18
|
+
"""Sync client. Use as a context manager to ensure the transport closes."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
server_url: str | None = None,
|
|
23
|
+
token: str | None = None,
|
|
24
|
+
timeout: float = 60.0,
|
|
25
|
+
transport: httpx.BaseTransport | None = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
self._base_url = server_url or os.environ.get("MIMIC_SERVER_URL") or "http://localhost:8000"
|
|
28
|
+
self._token = token if token is not None else os.environ.get("MIMIC_API_TOKEN")
|
|
29
|
+
self._http = httpx.Client(timeout=timeout, transport=transport)
|
|
30
|
+
|
|
31
|
+
def __enter__(self) -> Client:
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def __exit__(self, *exc: object) -> None:
|
|
35
|
+
self.close()
|
|
36
|
+
|
|
37
|
+
def close(self) -> None:
|
|
38
|
+
self._http.close()
|
|
39
|
+
|
|
40
|
+
def _request_json(self, method: str, path: str, **kwargs: Any) -> Any:
|
|
41
|
+
spec = build_request_spec(
|
|
42
|
+
base_url=self._base_url,
|
|
43
|
+
method=method,
|
|
44
|
+
path=path,
|
|
45
|
+
token=self._token,
|
|
46
|
+
**kwargs,
|
|
47
|
+
)
|
|
48
|
+
r = self._http.request(
|
|
49
|
+
spec.method, spec.url, headers=spec.headers, data=spec.data, files=spec.files
|
|
50
|
+
)
|
|
51
|
+
raise_for_response(r)
|
|
52
|
+
return r.json()
|
|
53
|
+
|
|
54
|
+
def _request_audio(self, method: str, path: str, **kwargs: Any) -> bytes:
|
|
55
|
+
spec = build_request_spec(
|
|
56
|
+
base_url=self._base_url,
|
|
57
|
+
method=method,
|
|
58
|
+
path=path,
|
|
59
|
+
token=self._token,
|
|
60
|
+
**kwargs,
|
|
61
|
+
)
|
|
62
|
+
r = self._http.request(
|
|
63
|
+
spec.method, spec.url, headers=spec.headers, data=spec.data, files=spec.files
|
|
64
|
+
)
|
|
65
|
+
raise_for_response(r)
|
|
66
|
+
return r.content
|
|
67
|
+
|
|
68
|
+
def health(self) -> dict[str, Any]:
|
|
69
|
+
return self._request_json("GET", "/health")
|
|
70
|
+
|
|
71
|
+
def list_voices(self) -> list[dict[str, str]]:
|
|
72
|
+
return self._request_json("GET", "/voices")["voices"]
|
|
73
|
+
|
|
74
|
+
def list_clones(self) -> list[str]:
|
|
75
|
+
return self._request_json("GET", "/clone/voices")["voices"]
|
|
76
|
+
|
|
77
|
+
def tts(
|
|
78
|
+
self,
|
|
79
|
+
text: str,
|
|
80
|
+
*,
|
|
81
|
+
language: str = "English",
|
|
82
|
+
speaker: str = "Ryan",
|
|
83
|
+
instruct: str = "",
|
|
84
|
+
) -> bytes:
|
|
85
|
+
return self._request_audio(
|
|
86
|
+
"POST",
|
|
87
|
+
"/tts",
|
|
88
|
+
data={"text": text, "language": language, "speaker": speaker, "instruct": instruct},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def tts_to_file(self, text: str, out: Path | str, **kwargs: Any) -> Path:
|
|
92
|
+
audio = self.tts(text, **kwargs)
|
|
93
|
+
out_path = Path(out)
|
|
94
|
+
out_path.write_bytes(audio)
|
|
95
|
+
return out_path
|
|
96
|
+
|
|
97
|
+
def clone_register(
|
|
98
|
+
self,
|
|
99
|
+
name: str,
|
|
100
|
+
audio: Path | str | bytes | BufferedReader,
|
|
101
|
+
transcript: str,
|
|
102
|
+
) -> dict[str, str]:
|
|
103
|
+
files = {"ref_audio": _as_upload(audio)}
|
|
104
|
+
return self._request_json(
|
|
105
|
+
"POST",
|
|
106
|
+
"/clone/register",
|
|
107
|
+
data={"name": name, "ref_text": transcript},
|
|
108
|
+
files=files,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def clone_tts(
|
|
112
|
+
self,
|
|
113
|
+
name: str,
|
|
114
|
+
text: str,
|
|
115
|
+
*,
|
|
116
|
+
language: str = "English",
|
|
117
|
+
) -> bytes:
|
|
118
|
+
return self._request_audio(
|
|
119
|
+
"POST",
|
|
120
|
+
"/clone/tts",
|
|
121
|
+
data={"text": text, "language": language, "name": name},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def clone_oneshot(
|
|
125
|
+
self,
|
|
126
|
+
text: str,
|
|
127
|
+
audio: Path | str | bytes | BufferedReader,
|
|
128
|
+
transcript: str,
|
|
129
|
+
*,
|
|
130
|
+
language: str = "English",
|
|
131
|
+
) -> bytes:
|
|
132
|
+
files = {"ref_audio": _as_upload(audio)}
|
|
133
|
+
return self._request_audio(
|
|
134
|
+
"POST",
|
|
135
|
+
"/clone/oneshot",
|
|
136
|
+
data={"text": text, "language": language, "ref_text": transcript},
|
|
137
|
+
files=files,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _as_upload(audio: Path | str | bytes | BufferedReader) -> tuple[str, Any, str]:
|
|
142
|
+
"""Normalize audio inputs to a (filename, fileobj-or-bytes, content-type) tuple."""
|
|
143
|
+
if isinstance(audio, (str, Path)):
|
|
144
|
+
path = Path(audio)
|
|
145
|
+
return (path.name, path.read_bytes(), "audio/wav")
|
|
146
|
+
if isinstance(audio, bytes):
|
|
147
|
+
return ("ref.wav", audio, "audio/wav")
|
|
148
|
+
return ("ref.wav", audio, "audio/wav")
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Client configuration: kwarg → env → TOML → defaults."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import tomllib
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from platformdirs import user_config_path
|
|
11
|
+
|
|
12
|
+
DEFAULT_SERVER_URL = "http://localhost:8000"
|
|
13
|
+
DEFAULT_VOICE = "Ryan"
|
|
14
|
+
|
|
15
|
+
_KNOWN_TOML_KEYS = frozenset({"server_url", "token", "default_voice"})
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ClientConfig:
|
|
20
|
+
server_url: str
|
|
21
|
+
token: str | None
|
|
22
|
+
default_voice: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _config_dir() -> Path:
|
|
26
|
+
override = os.environ.get("MIMIC_CONFIG_DIR")
|
|
27
|
+
if override:
|
|
28
|
+
return Path(override)
|
|
29
|
+
return user_config_path("mimic", appauthor=False)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _read_toml(config_dir: Path) -> dict[str, object]:
|
|
33
|
+
path = config_dir / "config.toml"
|
|
34
|
+
if not path.exists():
|
|
35
|
+
return {}
|
|
36
|
+
try:
|
|
37
|
+
with path.open("rb") as f:
|
|
38
|
+
data = tomllib.load(f)
|
|
39
|
+
except tomllib.TOMLDecodeError as e:
|
|
40
|
+
raise ValueError(f"invalid TOML at {path}: {e}") from e
|
|
41
|
+
return {k: v for k, v in data.items() if k in _KNOWN_TOML_KEYS}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_config(
|
|
45
|
+
*,
|
|
46
|
+
server_url: str | None = None,
|
|
47
|
+
token: str | None = None,
|
|
48
|
+
default_voice: str | None = None,
|
|
49
|
+
config_dir: Path | None = None,
|
|
50
|
+
) -> ClientConfig:
|
|
51
|
+
"""Resolve config: kwarg → env → TOML → defaults."""
|
|
52
|
+
file_data = _read_toml(config_dir or _config_dir())
|
|
53
|
+
|
|
54
|
+
resolved_url = (
|
|
55
|
+
server_url
|
|
56
|
+
or os.environ.get("MIMIC_SERVER_URL")
|
|
57
|
+
or file_data.get("server_url")
|
|
58
|
+
or DEFAULT_SERVER_URL
|
|
59
|
+
)
|
|
60
|
+
resolved_token = (
|
|
61
|
+
token
|
|
62
|
+
if token is not None
|
|
63
|
+
else os.environ.get("MIMIC_API_TOKEN") or file_data.get("token") or None
|
|
64
|
+
)
|
|
65
|
+
resolved_voice = default_voice or file_data.get("default_voice") or DEFAULT_VOICE
|
|
66
|
+
|
|
67
|
+
return ClientConfig(
|
|
68
|
+
server_url=str(resolved_url),
|
|
69
|
+
token=str(resolved_token) if resolved_token is not None else None,
|
|
70
|
+
default_voice=str(resolved_voice),
|
|
71
|
+
)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Exception hierarchy for mimic-tts client errors."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MimicError(Exception):
|
|
7
|
+
"""Base class for all mimic-tts client errors."""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MimicAPIError(MimicError):
|
|
11
|
+
"""Server returned a non-2xx response."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, status_code: int, message: str) -> None:
|
|
14
|
+
super().__init__(f"HTTP {status_code}: {message}")
|
|
15
|
+
self.status_code = status_code
|
|
16
|
+
self.message = message
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MimicAuthError(MimicAPIError):
|
|
20
|
+
"""401: missing or invalid bearer token."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MimicNotFoundError(MimicAPIError):
|
|
24
|
+
"""404: requested resource (e.g. clone voice) does not exist."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MimicValidationError(MimicAPIError):
|
|
28
|
+
"""4xx other than 401/404: request was rejected as invalid."""
|