abstractvoice 0.5.2__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvoice/__init__.py +2 -5
- abstractvoice/__main__.py +82 -3
- abstractvoice/adapters/__init__.py +12 -0
- abstractvoice/adapters/base.py +207 -0
- abstractvoice/adapters/stt_faster_whisper.py +401 -0
- abstractvoice/adapters/tts_piper.py +480 -0
- abstractvoice/aec/__init__.py +10 -0
- abstractvoice/aec/webrtc_apm.py +56 -0
- abstractvoice/artifacts.py +173 -0
- abstractvoice/audio/__init__.py +7 -0
- abstractvoice/audio/recorder.py +46 -0
- abstractvoice/audio/resample.py +25 -0
- abstractvoice/cloning/__init__.py +7 -0
- abstractvoice/cloning/engine_chroma.py +738 -0
- abstractvoice/cloning/engine_f5.py +546 -0
- abstractvoice/cloning/manager.py +349 -0
- abstractvoice/cloning/store.py +362 -0
- abstractvoice/compute/__init__.py +6 -0
- abstractvoice/compute/device.py +73 -0
- abstractvoice/config/__init__.py +2 -0
- abstractvoice/config/voice_catalog.py +19 -0
- abstractvoice/dependency_check.py +0 -1
- abstractvoice/examples/cli_repl.py +2408 -243
- abstractvoice/examples/voice_cli.py +64 -63
- abstractvoice/integrations/__init__.py +2 -0
- abstractvoice/integrations/abstractcore.py +116 -0
- abstractvoice/integrations/abstractcore_plugin.py +253 -0
- abstractvoice/prefetch.py +82 -0
- abstractvoice/recognition.py +424 -42
- abstractvoice/stop_phrase.py +103 -0
- abstractvoice/text_sanitize.py +33 -0
- abstractvoice/tts/__init__.py +3 -3
- abstractvoice/tts/adapter_tts_engine.py +210 -0
- abstractvoice/tts/tts_engine.py +257 -1208
- abstractvoice/vm/__init__.py +2 -0
- abstractvoice/vm/common.py +21 -0
- abstractvoice/vm/core.py +139 -0
- abstractvoice/vm/manager.py +108 -0
- abstractvoice/vm/stt_mixin.py +158 -0
- abstractvoice/vm/tts_mixin.py +550 -0
- abstractvoice/voice_manager.py +6 -1061
- abstractvoice-0.6.2.dist-info/METADATA +213 -0
- abstractvoice-0.6.2.dist-info/RECORD +53 -0
- {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.2.dist-info}/WHEEL +1 -1
- abstractvoice-0.6.2.dist-info/entry_points.txt +6 -0
- abstractvoice/instant_setup.py +0 -83
- abstractvoice/simple_model_manager.py +0 -539
- abstractvoice-0.5.2.dist-info/METADATA +0 -1458
- abstractvoice-0.5.2.dist-info/RECORD +0 -23
- abstractvoice-0.5.2.dist-info/entry_points.txt +0 -2
- {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.2.dist-info}/licenses/LICENSE +0 -0
- {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import mimetypes
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import asdict, is_dataclass
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, Optional, Protocol, Union
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_ARTIFACT_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{1,128}$")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _utc_now_iso() -> str:
|
|
17
|
+
return datetime.now(timezone.utc).isoformat()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def sha256_hex(content: bytes) -> str:
|
|
21
|
+
return hashlib.sha256(content).hexdigest()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def compute_artifact_id(content: bytes) -> str:
|
|
25
|
+
return sha256_hex(content)[:32]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_artifact_ref(value: Any) -> bool:
|
|
29
|
+
return isinstance(value, dict) and isinstance(value.get("$artifact"), str) and bool(value.get("$artifact"))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_artifact_id(ref: Dict[str, Any]) -> str:
|
|
33
|
+
return str(ref["$artifact"])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def make_media_ref(
|
|
37
|
+
artifact_id: str,
|
|
38
|
+
*,
|
|
39
|
+
content_type: Optional[str] = None,
|
|
40
|
+
filename: Optional[str] = None,
|
|
41
|
+
sha256: Optional[str] = None,
|
|
42
|
+
size_bytes: Optional[int] = None,
|
|
43
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
44
|
+
) -> Dict[str, Any]:
|
|
45
|
+
out: Dict[str, Any] = {"$artifact": str(artifact_id)}
|
|
46
|
+
if content_type:
|
|
47
|
+
out["content_type"] = str(content_type)
|
|
48
|
+
if filename:
|
|
49
|
+
out["filename"] = str(filename)
|
|
50
|
+
if sha256:
|
|
51
|
+
out["sha256"] = str(sha256)
|
|
52
|
+
if size_bytes is not None:
|
|
53
|
+
out["size_bytes"] = int(size_bytes)
|
|
54
|
+
if isinstance(metadata, dict) and metadata:
|
|
55
|
+
out["metadata"] = metadata
|
|
56
|
+
return out
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MediaStore(Protocol):
|
|
60
|
+
def store_bytes(
|
|
61
|
+
self,
|
|
62
|
+
content: bytes,
|
|
63
|
+
*,
|
|
64
|
+
content_type: str,
|
|
65
|
+
filename: Optional[str] = None,
|
|
66
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
67
|
+
tags: Optional[Dict[str, str]] = None,
|
|
68
|
+
run_id: Optional[str] = None,
|
|
69
|
+
artifact_id: Optional[str] = None,
|
|
70
|
+
) -> Dict[str, Any]: ...
|
|
71
|
+
|
|
72
|
+
def load_bytes(self, artifact_id: str) -> bytes: ...
|
|
73
|
+
|
|
74
|
+
def get_metadata(self, artifact_id: str) -> Optional[Dict[str, Any]]: ...
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class RuntimeArtifactStoreAdapter:
|
|
78
|
+
"""Duck-typed adapter for AbstractRuntime's ArtifactStore (no hard dependency)."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, artifact_store: Any):
|
|
81
|
+
self._store = artifact_store
|
|
82
|
+
|
|
83
|
+
def store_bytes(
|
|
84
|
+
self,
|
|
85
|
+
content: bytes,
|
|
86
|
+
*,
|
|
87
|
+
content_type: str,
|
|
88
|
+
filename: Optional[str] = None,
|
|
89
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
90
|
+
tags: Optional[Dict[str, str]] = None,
|
|
91
|
+
run_id: Optional[str] = None,
|
|
92
|
+
artifact_id: Optional[str] = None,
|
|
93
|
+
) -> Dict[str, Any]:
|
|
94
|
+
store_fn = getattr(self._store, "store", None)
|
|
95
|
+
if not callable(store_fn):
|
|
96
|
+
raise TypeError("Provided artifact_store does not have a callable .store(...)")
|
|
97
|
+
|
|
98
|
+
content_b = bytes(content)
|
|
99
|
+
content_type = str(content_type or "application/octet-stream")
|
|
100
|
+
sha = sha256_hex(content_b)
|
|
101
|
+
|
|
102
|
+
merged_tags: Dict[str, str] = {}
|
|
103
|
+
if isinstance(tags, dict):
|
|
104
|
+
merged_tags.update({str(k): str(v) for k, v in tags.items()})
|
|
105
|
+
if filename and "filename" not in merged_tags:
|
|
106
|
+
merged_tags["filename"] = str(filename)
|
|
107
|
+
if sha and "sha256" not in merged_tags:
|
|
108
|
+
merged_tags["sha256"] = sha
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
meta = store_fn(
|
|
112
|
+
content_b,
|
|
113
|
+
content_type=content_type,
|
|
114
|
+
run_id=str(run_id) if run_id else None,
|
|
115
|
+
tags=merged_tags or None,
|
|
116
|
+
artifact_id=str(artifact_id) if artifact_id else None,
|
|
117
|
+
)
|
|
118
|
+
except TypeError:
|
|
119
|
+
meta = store_fn(
|
|
120
|
+
content_b,
|
|
121
|
+
content_type=content_type,
|
|
122
|
+
run_id=str(run_id) if run_id else None,
|
|
123
|
+
tags=merged_tags or None,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
artifact_id_out = None
|
|
127
|
+
if isinstance(meta, dict):
|
|
128
|
+
artifact_id_out = meta.get("artifact_id")
|
|
129
|
+
elif hasattr(meta, "artifact_id"):
|
|
130
|
+
artifact_id_out = getattr(meta, "artifact_id", None)
|
|
131
|
+
if not isinstance(artifact_id_out, str) or not artifact_id_out.strip():
|
|
132
|
+
raise TypeError("artifact_store.store(...) did not return a usable artifact_id")
|
|
133
|
+
|
|
134
|
+
return make_media_ref(
|
|
135
|
+
str(artifact_id_out),
|
|
136
|
+
content_type=content_type,
|
|
137
|
+
filename=str(filename) if filename else None,
|
|
138
|
+
sha256=sha,
|
|
139
|
+
size_bytes=len(content_b),
|
|
140
|
+
metadata=metadata if isinstance(metadata, dict) else None,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def load_bytes(self, artifact_id: str) -> bytes:
|
|
144
|
+
load_fn = getattr(self._store, "load", None)
|
|
145
|
+
if not callable(load_fn):
|
|
146
|
+
raise TypeError("Provided artifact_store does not have a callable .load(...)")
|
|
147
|
+
artifact = load_fn(str(artifact_id))
|
|
148
|
+
if artifact is None:
|
|
149
|
+
raise FileNotFoundError(f"Artifact not found: {artifact_id}")
|
|
150
|
+
if isinstance(artifact, (bytes, bytearray)):
|
|
151
|
+
return bytes(artifact)
|
|
152
|
+
if hasattr(artifact, "content"):
|
|
153
|
+
return bytes(getattr(artifact, "content"))
|
|
154
|
+
raise TypeError("artifact_store.load(...) returned an unsupported value")
|
|
155
|
+
|
|
156
|
+
def get_metadata(self, artifact_id: str) -> Optional[Dict[str, Any]]:
|
|
157
|
+
meta_fn = getattr(self._store, "get_metadata", None)
|
|
158
|
+
if not callable(meta_fn):
|
|
159
|
+
return None
|
|
160
|
+
meta = meta_fn(str(artifact_id))
|
|
161
|
+
if meta is None:
|
|
162
|
+
return None
|
|
163
|
+
if isinstance(meta, dict):
|
|
164
|
+
return meta
|
|
165
|
+
to_dict = getattr(meta, "to_dict", None)
|
|
166
|
+
if callable(to_dict):
|
|
167
|
+
out = to_dict()
|
|
168
|
+
return out if isinstance(out, dict) else None
|
|
169
|
+
if is_dataclass(meta):
|
|
170
|
+
out = asdict(meta)
|
|
171
|
+
return out if isinstance(out, dict) else None
|
|
172
|
+
return None
|
|
173
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import soundfile as sf
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def record_wav(
|
|
11
|
+
output_path: str | Path,
|
|
12
|
+
*,
|
|
13
|
+
seconds: float = 6.0,
|
|
14
|
+
sample_rate: int = 24000,
|
|
15
|
+
channels: int = 1,
|
|
16
|
+
) -> str:
|
|
17
|
+
"""Record microphone audio to a WAV file (best-effort).
|
|
18
|
+
|
|
19
|
+
This is intended for interactive REPL usage (e.g., `/clone-my-voice`).
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
import sounddevice as sd
|
|
23
|
+
except Exception as e:
|
|
24
|
+
raise ImportError(
|
|
25
|
+
"Microphone recording requires sounddevice.\n"
|
|
26
|
+
"Install with: pip install abstractvoice\n"
|
|
27
|
+
f"Original error: {e}"
|
|
28
|
+
) from e
|
|
29
|
+
|
|
30
|
+
out = Path(output_path)
|
|
31
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
|
|
33
|
+
frames = int(sample_rate * float(seconds))
|
|
34
|
+
if frames <= 0:
|
|
35
|
+
raise ValueError("seconds must be > 0")
|
|
36
|
+
|
|
37
|
+
audio = sd.rec(frames, samplerate=sample_rate, channels=channels, dtype="float32")
|
|
38
|
+
sd.wait()
|
|
39
|
+
|
|
40
|
+
# downmix to mono if needed
|
|
41
|
+
if channels > 1:
|
|
42
|
+
audio = np.mean(audio, axis=1, keepdims=True)
|
|
43
|
+
|
|
44
|
+
sf.write(str(out), audio, sample_rate, subtype="PCM_16")
|
|
45
|
+
return str(out)
|
|
46
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def linear_resample_mono(audio: np.ndarray, src_sr: int, dst_sr: int) -> np.ndarray:
|
|
7
|
+
"""Lightweight mono resampler (linear interpolation).
|
|
8
|
+
|
|
9
|
+
Good enough for short prompts and avoids adding heavy DSP dependencies.
|
|
10
|
+
"""
|
|
11
|
+
src_sr = int(src_sr)
|
|
12
|
+
dst_sr = int(dst_sr)
|
|
13
|
+
if src_sr <= 0 or dst_sr <= 0:
|
|
14
|
+
return audio
|
|
15
|
+
if src_sr == dst_sr:
|
|
16
|
+
return audio
|
|
17
|
+
if audio is None or len(audio) < 2:
|
|
18
|
+
return audio
|
|
19
|
+
|
|
20
|
+
ratio = float(dst_sr) / float(src_sr)
|
|
21
|
+
new_len = max(1, int(round(len(audio) * ratio)))
|
|
22
|
+
x_old = np.linspace(0.0, 1.0, num=len(audio), endpoint=True)
|
|
23
|
+
x_new = np.linspace(0.0, 1.0, num=new_len, endpoint=True)
|
|
24
|
+
return np.interp(x_new, x_old, audio).astype(np.float32)
|
|
25
|
+
|