mimic-tts 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # Voice reference samples (personal data — don't commit recordings)
13
+ server/reference/*
14
+ !server/reference/.gitkeep
15
+ reference/*
16
+ !reference/.gitkeep
17
+
18
+ # Build artifacts
19
+ dist/
20
+ *.egg-info/
21
+
22
+ # Local secrets / env
23
+ .env
24
+ .env.local
25
+
26
+ # IDE / OS junk
27
+ .DS_Store
28
+ .idea/
29
+ .vscode/
30
+
31
+ # Audio recordings outside the reference dir
32
+ /*.wav
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: mimic-tts
3
+ Version: 0.0.1
4
+ Summary: Client for mimic-tts (Qwen3-TTS voice cloning + synthesis)
5
+ Author: Jim Vogel
6
+ License: MIT
7
+ Requires-Python: <3.14,>=3.12
8
+ Requires-Dist: httpx>=0.27
9
+ Requires-Dist: numpy>=1.26
10
+ Requires-Dist: platformdirs>=4.0
11
+ Requires-Dist: sounddevice>=0.4
12
+ Requires-Dist: soundfile>=0.12
13
+ Requires-Dist: typer>=0.12
14
+ Description-Content-Type: text/markdown
15
+
16
+ # mimic-tts
17
+
18
+ Python client and CLI for the [mimic-tts](https://github.com/voglster/mimic-tts) server.
19
+
20
+ ```bash
21
+ pip install mimic-tts
22
+ ```
23
+
24
+ Full documentation: <https://github.com/voglster/mimic-tts>
@@ -0,0 +1,9 @@
1
+ # mimic-tts
2
+
3
+ Python client and CLI for the [mimic-tts](https://github.com/voglster/mimic-tts) server.
4
+
5
+ ```bash
6
+ pip install mimic-tts
7
+ ```
8
+
9
+ Full documentation: <https://github.com/voglster/mimic-tts>
@@ -0,0 +1,7 @@
1
+ """mimic-tts — Python client for the mimic-tts server."""
2
+
3
+ from mimic._version import __version__
4
+ from mimic.async_client import AsyncClient
5
+ from mimic.client import Client
6
+
7
+ __all__ = ["AsyncClient", "Client", "__version__"]
@@ -0,0 +1,66 @@
1
+ """Shared request-building and error-translation logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ import httpx
10
+
11
+ from mimic.errors import (
12
+ MimicAPIError,
13
+ MimicAuthError,
14
+ MimicNotFoundError,
15
+ MimicValidationError,
16
+ )
17
+
18
+
19
+ @dataclass
20
+ class RequestSpec:
21
+ method: str
22
+ url: str
23
+ headers: dict[str, str] = field(default_factory=dict)
24
+ data: dict[str, Any] | None = None
25
+ files: dict[str, Any] | None = None
26
+
27
+
28
+ def build_request_spec(
29
+ *,
30
+ base_url: str,
31
+ method: str,
32
+ path: str,
33
+ token: str | None,
34
+ data: dict[str, Any] | None = None,
35
+ files: dict[str, Any] | None = None,
36
+ ) -> RequestSpec:
37
+ if not path.startswith("/"):
38
+ raise ValueError(f"path must start with '/': {path!r}")
39
+ url = base_url.rstrip("/") + path
40
+ headers: dict[str, str] = {}
41
+ if token is not None:
42
+ headers["Authorization"] = f"Bearer {token}"
43
+ return RequestSpec(method=method, url=url, headers=headers, data=data, files=files)
44
+
45
+
46
+ def _extract_detail(response: httpx.Response) -> str:
47
+ try:
48
+ body = response.json()
49
+ if isinstance(body, dict) and "detail" in body:
50
+ return str(body["detail"])
51
+ except Exception: # noqa: S110
52
+ pass
53
+ return response.text or response.reason_phrase or ""
54
+
55
+
56
+ def raise_for_response(response: httpx.Response) -> None:
57
+ if response.status_code < 400:
58
+ return
59
+ detail = _extract_detail(response)
60
+ if response.status_code == 401:
61
+ raise MimicAuthError(response.status_code, detail)
62
+ if response.status_code == 404:
63
+ raise MimicNotFoundError(response.status_code, detail)
64
+ if 400 <= response.status_code < 500:
65
+ raise MimicValidationError(response.status_code, detail)
66
+ raise MimicAPIError(response.status_code, detail)
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,147 @@
1
+ """Asynchronous client for the mimic-tts server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ import httpx
10
+
11
+ if TYPE_CHECKING:
12
+ from io import BufferedReader
13
+
14
+ from mimic._base import build_request_spec, raise_for_response
15
+ from mimic.client import _as_upload
16
+
17
+
18
+ class AsyncClient:
19
+ """Async client. Use as `async with AsyncClient(...) as c:`."""
20
+
21
+ def __init__(
22
+ self,
23
+ server_url: str | None = None,
24
+ token: str | None = None,
25
+ timeout: float = 60.0,
26
+ transport: httpx.AsyncBaseTransport | httpx.BaseTransport | None = None,
27
+ ) -> None:
28
+ self._base_url = server_url or os.environ.get("MIMIC_SERVER_URL") or "http://localhost:8000"
29
+ self._token = token if token is not None else os.environ.get("MIMIC_API_TOKEN")
30
+ self._http = httpx.AsyncClient(timeout=timeout, transport=transport) # type: ignore[arg-type]
31
+
32
+ async def __aenter__(self) -> AsyncClient:
33
+ return self
34
+
35
+ async def __aexit__(self, *exc: object) -> None:
36
+ await self.close()
37
+
38
+ async def close(self) -> None:
39
+ await self._http.aclose()
40
+
41
+ async def _request_json(self, method: str, path: str, **kwargs: Any) -> Any:
42
+ spec = build_request_spec(
43
+ base_url=self._base_url,
44
+ method=method,
45
+ path=path,
46
+ token=self._token,
47
+ **kwargs,
48
+ )
49
+ r = await self._http.request(
50
+ spec.method,
51
+ spec.url,
52
+ headers=spec.headers,
53
+ data=spec.data,
54
+ files=spec.files,
55
+ )
56
+ raise_for_response(r)
57
+ return r.json()
58
+
59
+ async def _request_audio(self, method: str, path: str, **kwargs: Any) -> bytes:
60
+ spec = build_request_spec(
61
+ base_url=self._base_url,
62
+ method=method,
63
+ path=path,
64
+ token=self._token,
65
+ **kwargs,
66
+ )
67
+ r = await self._http.request(
68
+ spec.method,
69
+ spec.url,
70
+ headers=spec.headers,
71
+ data=spec.data,
72
+ files=spec.files,
73
+ )
74
+ raise_for_response(r)
75
+ return r.content
76
+
77
+ async def health(self) -> dict[str, Any]:
78
+ return await self._request_json("GET", "/health")
79
+
80
+ async def list_voices(self) -> list[dict[str, str]]:
81
+ return (await self._request_json("GET", "/voices"))["voices"]
82
+
83
+ async def list_clones(self) -> list[str]:
84
+ return (await self._request_json("GET", "/clone/voices"))["voices"]
85
+
86
+ async def tts(
87
+ self,
88
+ text: str,
89
+ *,
90
+ language: str = "English",
91
+ speaker: str = "Ryan",
92
+ instruct: str = "",
93
+ ) -> bytes:
94
+ return await self._request_audio(
95
+ "POST",
96
+ "/tts",
97
+ data={"text": text, "language": language, "speaker": speaker, "instruct": instruct},
98
+ )
99
+
100
+ async def tts_to_file(self, text: str, out: Path | str, **kwargs: Any) -> Path:
101
+ audio = await self.tts(text, **kwargs)
102
+ out_path = Path(out)
103
+ out_path.write_bytes(audio)
104
+ return out_path
105
+
106
+ async def clone_register(
107
+ self,
108
+ name: str,
109
+ audio: Path | str | bytes | BufferedReader,
110
+ transcript: str,
111
+ ) -> dict[str, str]:
112
+ files = {"ref_audio": _as_upload(audio)}
113
+ return await self._request_json(
114
+ "POST",
115
+ "/clone/register",
116
+ data={"name": name, "ref_text": transcript},
117
+ files=files,
118
+ )
119
+
120
+ async def clone_tts(
121
+ self,
122
+ name: str,
123
+ text: str,
124
+ *,
125
+ language: str = "English",
126
+ ) -> bytes:
127
+ return await self._request_audio(
128
+ "POST",
129
+ "/clone/tts",
130
+ data={"text": text, "language": language, "name": name},
131
+ )
132
+
133
+ async def clone_oneshot(
134
+ self,
135
+ text: str,
136
+ audio: Path | str | bytes | BufferedReader,
137
+ transcript: str,
138
+ *,
139
+ language: str = "English",
140
+ ) -> bytes:
141
+ files = {"ref_audio": _as_upload(audio)}
142
+ return await self._request_audio(
143
+ "POST",
144
+ "/clone/oneshot",
145
+ data={"text": text, "language": language, "ref_text": transcript},
146
+ files=files,
147
+ )
@@ -0,0 +1,157 @@
1
+ """`mimic` CLI — typer-based command-line interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import sys
7
+ import threading
8
+ from pathlib import Path
9
+ from typing import Annotated
10
+
11
+ import typer
12
+
13
+ from mimic.client import Client
14
+ from mimic.config import load_config
15
+ from mimic.recorder import (
16
+ DEFAULT_SAMPLE_RATE,
17
+ pick_script,
18
+ play,
19
+ record_until_enter,
20
+ save_wav,
21
+ )
22
+
23
+ app = typer.Typer(no_args_is_help=True, add_completion=False, help="mimic-tts CLI")
24
+ clone_app = typer.Typer(no_args_is_help=True, help="Clone voice operations")
25
+ app.add_typer(clone_app, name="clone")
26
+
27
+
28
+ def _client() -> Client:
29
+ cfg = load_config()
30
+ return Client(server_url=cfg.server_url, token=cfg.token)
31
+
32
+
33
+ @app.command()
34
+ def say(
35
+ text: Annotated[str, typer.Argument(help="Text to synthesize.")],
36
+ voice: Annotated[str | None, typer.Option(help="Speaker name.")] = None,
37
+ out: Annotated[Path, typer.Option(help="Output wav path.")] = Path("out.wav"),
38
+ language: Annotated[str, typer.Option()] = "English",
39
+ ) -> None:
40
+ """Synthesize speech with a built-in voice."""
41
+ cfg = load_config()
42
+ speaker = voice or cfg.default_voice
43
+ with _client() as c:
44
+ c.tts_to_file(text, out, speaker=speaker, language=language)
45
+ typer.echo(f"wrote {out}")
46
+
47
+
48
+ @app.command()
49
+ def voices() -> None:
50
+ """List built-in voices."""
51
+ with _client() as c:
52
+ for v in c.list_voices():
53
+ typer.echo(f"{v['name']:12s} {v['language']}")
54
+
55
+
56
+ @app.command()
57
+ def clones() -> None:
58
+ """List registered clone voices."""
59
+ with _client() as c:
60
+ for name in c.list_clones():
61
+ typer.echo(name)
62
+
63
+
64
+ @app.command()
65
+ def health() -> None:
66
+ """Show server health and currently loaded models."""
67
+ with _client() as c:
68
+ info = c.health()
69
+ typer.echo(info)
70
+
71
+
72
+ @app.command(name="config")
73
+ def show_config() -> None:
74
+ """Print the resolved client configuration."""
75
+ cfg = load_config()
76
+ typer.echo(f"server_url {cfg.server_url}")
77
+ typer.echo(f"token {'<set>' if cfg.token else '<none>'}")
78
+ typer.echo(f"default_voice {cfg.default_voice}")
79
+
80
+
81
+ @app.command()
82
+ def record(
83
+ name: Annotated[str, typer.Argument(help="Name to register the clone under.")],
84
+ audio: Annotated[Path | None, typer.Option(help="Skip the recorder; use this file.")] = None,
85
+ text: Annotated[str | None, typer.Option(help="Transcript for --audio.")] = None,
86
+ ) -> None:
87
+ """Record a reference voice and register it on the server."""
88
+ if audio is not None:
89
+ if text is None:
90
+ typer.echo("--text is required when --audio is provided", err=True)
91
+ raise typer.Exit(2)
92
+ with _client() as c:
93
+ result = c.clone_register(name, audio, text)
94
+ typer.echo(f"registered '{result['name']}'")
95
+ return
96
+
97
+ _interactive_record_and_register(name)
98
+
99
+
100
+ @clone_app.command(name="say")
101
+ def clone_say(
102
+ name: Annotated[str, typer.Argument(help="Registered clone name.")],
103
+ text: Annotated[str, typer.Argument()],
104
+ out: Annotated[Path, typer.Option(help="Output wav path.")] = Path("out.wav"),
105
+ language: Annotated[str, typer.Option()] = "English",
106
+ ) -> None:
107
+ """Synthesize speech using a registered clone voice."""
108
+ with _client() as c:
109
+ audio = c.clone_tts(name, text, language=language)
110
+ out.write_bytes(audio)
111
+ typer.echo(f"wrote {out}")
112
+
113
+
114
+ def _interactive_record_and_register(name: str) -> None:
115
+ """Drive the guided recorder. Kept thin; primitives live in `mimic.recorder`."""
116
+ script = pick_script()
117
+ typer.echo(f"\nRead this script when ready:\n\n {script}\n")
118
+ typer.prompt("Press Enter to start recording", default="", show_default=False)
119
+
120
+ typer.echo("Recording… press Enter to stop.")
121
+ stop = threading.Event()
122
+
123
+ def _wait_for_enter() -> None:
124
+ sys.stdin.readline()
125
+ stop.set()
126
+
127
+ waiter = threading.Thread(target=_wait_for_enter)
128
+ waiter.daemon = True
129
+ waiter.start()
130
+
131
+ result = record_until_enter(
132
+ sample_rate=DEFAULT_SAMPLE_RATE,
133
+ channels=1,
134
+ max_seconds=30.0,
135
+ stop_event=stop,
136
+ )
137
+
138
+ typer.echo("Playing back…")
139
+ play(result.audio, result.sample_rate)
140
+
141
+ keep = typer.prompt("Keep this take? [y/N/r=retry]", default="N").strip().lower()
142
+ if keep == "r":
143
+ _interactive_record_and_register(name)
144
+ return
145
+ if not keep.startswith("y"):
146
+ typer.echo("discarded.")
147
+ raise typer.Exit(0)
148
+
149
+ transcript = typer.prompt("Transcript", default=script)
150
+
151
+ buf = io.BytesIO()
152
+ save_wav(buf, result.audio, sample_rate=result.sample_rate)
153
+ buf.seek(0)
154
+
155
+ with _client() as c:
156
+ out = c.clone_register(name, buf.read(), transcript)
157
+ typer.echo(f"registered '{out['name']}'")
@@ -0,0 +1,148 @@
1
+ """Synchronous client for the mimic-tts server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ if TYPE_CHECKING:
10
+ from io import BufferedReader
11
+
12
+ import httpx
13
+
14
+ from mimic._base import build_request_spec, raise_for_response
15
+
16
+
17
+ class Client:
18
+ """Sync client. Use as a context manager to ensure the transport closes."""
19
+
20
+ def __init__(
21
+ self,
22
+ server_url: str | None = None,
23
+ token: str | None = None,
24
+ timeout: float = 60.0,
25
+ transport: httpx.BaseTransport | None = None,
26
+ ) -> None:
27
+ self._base_url = server_url or os.environ.get("MIMIC_SERVER_URL") or "http://localhost:8000"
28
+ self._token = token if token is not None else os.environ.get("MIMIC_API_TOKEN")
29
+ self._http = httpx.Client(timeout=timeout, transport=transport)
30
+
31
+ def __enter__(self) -> Client:
32
+ return self
33
+
34
+ def __exit__(self, *exc: object) -> None:
35
+ self.close()
36
+
37
+ def close(self) -> None:
38
+ self._http.close()
39
+
40
+ def _request_json(self, method: str, path: str, **kwargs: Any) -> Any:
41
+ spec = build_request_spec(
42
+ base_url=self._base_url,
43
+ method=method,
44
+ path=path,
45
+ token=self._token,
46
+ **kwargs,
47
+ )
48
+ r = self._http.request(
49
+ spec.method, spec.url, headers=spec.headers, data=spec.data, files=spec.files
50
+ )
51
+ raise_for_response(r)
52
+ return r.json()
53
+
54
+ def _request_audio(self, method: str, path: str, **kwargs: Any) -> bytes:
55
+ spec = build_request_spec(
56
+ base_url=self._base_url,
57
+ method=method,
58
+ path=path,
59
+ token=self._token,
60
+ **kwargs,
61
+ )
62
+ r = self._http.request(
63
+ spec.method, spec.url, headers=spec.headers, data=spec.data, files=spec.files
64
+ )
65
+ raise_for_response(r)
66
+ return r.content
67
+
68
+ def health(self) -> dict[str, Any]:
69
+ return self._request_json("GET", "/health")
70
+
71
+ def list_voices(self) -> list[dict[str, str]]:
72
+ return self._request_json("GET", "/voices")["voices"]
73
+
74
+ def list_clones(self) -> list[str]:
75
+ return self._request_json("GET", "/clone/voices")["voices"]
76
+
77
+ def tts(
78
+ self,
79
+ text: str,
80
+ *,
81
+ language: str = "English",
82
+ speaker: str = "Ryan",
83
+ instruct: str = "",
84
+ ) -> bytes:
85
+ return self._request_audio(
86
+ "POST",
87
+ "/tts",
88
+ data={"text": text, "language": language, "speaker": speaker, "instruct": instruct},
89
+ )
90
+
91
+ def tts_to_file(self, text: str, out: Path | str, **kwargs: Any) -> Path:
92
+ audio = self.tts(text, **kwargs)
93
+ out_path = Path(out)
94
+ out_path.write_bytes(audio)
95
+ return out_path
96
+
97
+ def clone_register(
98
+ self,
99
+ name: str,
100
+ audio: Path | str | bytes | BufferedReader,
101
+ transcript: str,
102
+ ) -> dict[str, str]:
103
+ files = {"ref_audio": _as_upload(audio)}
104
+ return self._request_json(
105
+ "POST",
106
+ "/clone/register",
107
+ data={"name": name, "ref_text": transcript},
108
+ files=files,
109
+ )
110
+
111
+ def clone_tts(
112
+ self,
113
+ name: str,
114
+ text: str,
115
+ *,
116
+ language: str = "English",
117
+ ) -> bytes:
118
+ return self._request_audio(
119
+ "POST",
120
+ "/clone/tts",
121
+ data={"text": text, "language": language, "name": name},
122
+ )
123
+
124
+ def clone_oneshot(
125
+ self,
126
+ text: str,
127
+ audio: Path | str | bytes | BufferedReader,
128
+ transcript: str,
129
+ *,
130
+ language: str = "English",
131
+ ) -> bytes:
132
+ files = {"ref_audio": _as_upload(audio)}
133
+ return self._request_audio(
134
+ "POST",
135
+ "/clone/oneshot",
136
+ data={"text": text, "language": language, "ref_text": transcript},
137
+ files=files,
138
+ )
139
+
140
+
141
+ def _as_upload(audio: Path | str | bytes | BufferedReader) -> tuple[str, Any, str]:
142
+ """Normalize audio inputs to a (filename, fileobj-or-bytes, content-type) tuple."""
143
+ if isinstance(audio, (str, Path)):
144
+ path = Path(audio)
145
+ return (path.name, path.read_bytes(), "audio/wav")
146
+ if isinstance(audio, bytes):
147
+ return ("ref.wav", audio, "audio/wav")
148
+ return ("ref.wav", audio, "audio/wav")
@@ -0,0 +1,71 @@
1
+ """Client configuration: kwarg → env → TOML → defaults."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import tomllib
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ from platformdirs import user_config_path
11
+
12
+ DEFAULT_SERVER_URL = "http://localhost:8000"
13
+ DEFAULT_VOICE = "Ryan"
14
+
15
+ _KNOWN_TOML_KEYS = frozenset({"server_url", "token", "default_voice"})
16
+
17
+
18
+ @dataclass
19
+ class ClientConfig:
20
+ server_url: str
21
+ token: str | None
22
+ default_voice: str
23
+
24
+
25
+ def _config_dir() -> Path:
26
+ override = os.environ.get("MIMIC_CONFIG_DIR")
27
+ if override:
28
+ return Path(override)
29
+ return user_config_path("mimic", appauthor=False)
30
+
31
+
32
+ def _read_toml(config_dir: Path) -> dict[str, object]:
33
+ path = config_dir / "config.toml"
34
+ if not path.exists():
35
+ return {}
36
+ try:
37
+ with path.open("rb") as f:
38
+ data = tomllib.load(f)
39
+ except tomllib.TOMLDecodeError as e:
40
+ raise ValueError(f"invalid TOML at {path}: {e}") from e
41
+ return {k: v for k, v in data.items() if k in _KNOWN_TOML_KEYS}
42
+
43
+
44
+ def load_config(
45
+ *,
46
+ server_url: str | None = None,
47
+ token: str | None = None,
48
+ default_voice: str | None = None,
49
+ config_dir: Path | None = None,
50
+ ) -> ClientConfig:
51
+ """Resolve config: kwarg → env → TOML → defaults."""
52
+ file_data = _read_toml(config_dir or _config_dir())
53
+
54
+ resolved_url = (
55
+ server_url
56
+ or os.environ.get("MIMIC_SERVER_URL")
57
+ or file_data.get("server_url")
58
+ or DEFAULT_SERVER_URL
59
+ )
60
+ resolved_token = (
61
+ token
62
+ if token is not None
63
+ else os.environ.get("MIMIC_API_TOKEN") or file_data.get("token") or None
64
+ )
65
+ resolved_voice = default_voice or file_data.get("default_voice") or DEFAULT_VOICE
66
+
67
+ return ClientConfig(
68
+ server_url=str(resolved_url),
69
+ token=str(resolved_token) if resolved_token is not None else None,
70
+ default_voice=str(resolved_voice),
71
+ )
@@ -0,0 +1,28 @@
1
+ """Exception hierarchy for mimic-tts client errors."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class MimicError(Exception):
7
+ """Base class for all mimic-tts client errors."""
8
+
9
+
10
+ class MimicAPIError(MimicError):
11
+ """Server returned a non-2xx response."""
12
+
13
+ def __init__(self, status_code: int, message: str) -> None:
14
+ super().__init__(f"HTTP {status_code}: {message}")
15
+ self.status_code = status_code
16
+ self.message = message
17
+
18
+
19
+ class MimicAuthError(MimicAPIError):
20
+ """401: missing or invalid bearer token."""
21
+
22
+
23
+ class MimicNotFoundError(MimicAPIError):
24
+ """404: requested resource (e.g. clone voice) does not exist."""
25
+
26
+
27
+ class MimicValidationError(MimicAPIError):
28
+ """4xx other than 401/404: request was rejected as invalid."""