abstractvoice 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. abstractvoice/__init__.py +2 -5
  2. abstractvoice/__main__.py +82 -3
  3. abstractvoice/adapters/__init__.py +12 -0
  4. abstractvoice/adapters/base.py +207 -0
  5. abstractvoice/adapters/stt_faster_whisper.py +401 -0
  6. abstractvoice/adapters/tts_piper.py +480 -0
  7. abstractvoice/aec/__init__.py +10 -0
  8. abstractvoice/aec/webrtc_apm.py +56 -0
  9. abstractvoice/artifacts.py +173 -0
  10. abstractvoice/audio/__init__.py +7 -0
  11. abstractvoice/audio/recorder.py +46 -0
  12. abstractvoice/audio/resample.py +25 -0
  13. abstractvoice/cloning/__init__.py +7 -0
  14. abstractvoice/cloning/engine_chroma.py +738 -0
  15. abstractvoice/cloning/engine_f5.py +546 -0
  16. abstractvoice/cloning/manager.py +349 -0
  17. abstractvoice/cloning/store.py +362 -0
  18. abstractvoice/compute/__init__.py +6 -0
  19. abstractvoice/compute/device.py +73 -0
  20. abstractvoice/config/__init__.py +2 -0
  21. abstractvoice/config/voice_catalog.py +19 -0
  22. abstractvoice/dependency_check.py +0 -1
  23. abstractvoice/examples/cli_repl.py +2403 -243
  24. abstractvoice/examples/voice_cli.py +64 -63
  25. abstractvoice/integrations/__init__.py +2 -0
  26. abstractvoice/integrations/abstractcore.py +116 -0
  27. abstractvoice/integrations/abstractcore_plugin.py +253 -0
  28. abstractvoice/prefetch.py +82 -0
  29. abstractvoice/recognition.py +424 -42
  30. abstractvoice/stop_phrase.py +103 -0
  31. abstractvoice/tts/__init__.py +3 -3
  32. abstractvoice/tts/adapter_tts_engine.py +210 -0
  33. abstractvoice/tts/tts_engine.py +257 -1208
  34. abstractvoice/vm/__init__.py +2 -0
  35. abstractvoice/vm/common.py +21 -0
  36. abstractvoice/vm/core.py +139 -0
  37. abstractvoice/vm/manager.py +108 -0
  38. abstractvoice/vm/stt_mixin.py +158 -0
  39. abstractvoice/vm/tts_mixin.py +550 -0
  40. abstractvoice/voice_manager.py +6 -1061
  41. abstractvoice-0.6.1.dist-info/METADATA +213 -0
  42. abstractvoice-0.6.1.dist-info/RECORD +52 -0
  43. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/WHEEL +1 -1
  44. abstractvoice-0.6.1.dist-info/entry_points.txt +6 -0
  45. abstractvoice/instant_setup.py +0 -83
  46. abstractvoice/simple_model_manager.py +0 -539
  47. abstractvoice-0.5.2.dist-info/METADATA +0 -1458
  48. abstractvoice-0.5.2.dist-info/RECORD +0 -23
  49. abstractvoice-0.5.2.dist-info/entry_points.txt +0 -2
  50. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/licenses/LICENSE +0 -0
  51. {abstractvoice-0.5.2.dist-info → abstractvoice-0.6.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,173 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import mimetypes
6
+ import re
7
+ from dataclasses import asdict, is_dataclass
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any, Dict, Optional, Protocol, Union
11
+
12
+
13
+ _ARTIFACT_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{1,128}$")
14
+
15
+
16
+ def _utc_now_iso() -> str:
17
+ return datetime.now(timezone.utc).isoformat()
18
+
19
+
20
+ def sha256_hex(content: bytes) -> str:
21
+ return hashlib.sha256(content).hexdigest()
22
+
23
+
24
+ def compute_artifact_id(content: bytes) -> str:
25
+ return sha256_hex(content)[:32]
26
+
27
+
28
+ def is_artifact_ref(value: Any) -> bool:
29
+ return isinstance(value, dict) and isinstance(value.get("$artifact"), str) and bool(value.get("$artifact"))
30
+
31
+
32
+ def get_artifact_id(ref: Dict[str, Any]) -> str:
33
+ return str(ref["$artifact"])
34
+
35
+
36
+ def make_media_ref(
37
+ artifact_id: str,
38
+ *,
39
+ content_type: Optional[str] = None,
40
+ filename: Optional[str] = None,
41
+ sha256: Optional[str] = None,
42
+ size_bytes: Optional[int] = None,
43
+ metadata: Optional[Dict[str, Any]] = None,
44
+ ) -> Dict[str, Any]:
45
+ out: Dict[str, Any] = {"$artifact": str(artifact_id)}
46
+ if content_type:
47
+ out["content_type"] = str(content_type)
48
+ if filename:
49
+ out["filename"] = str(filename)
50
+ if sha256:
51
+ out["sha256"] = str(sha256)
52
+ if size_bytes is not None:
53
+ out["size_bytes"] = int(size_bytes)
54
+ if isinstance(metadata, dict) and metadata:
55
+ out["metadata"] = metadata
56
+ return out
57
+
58
+
59
+ class MediaStore(Protocol):
60
+ def store_bytes(
61
+ self,
62
+ content: bytes,
63
+ *,
64
+ content_type: str,
65
+ filename: Optional[str] = None,
66
+ metadata: Optional[Dict[str, Any]] = None,
67
+ tags: Optional[Dict[str, str]] = None,
68
+ run_id: Optional[str] = None,
69
+ artifact_id: Optional[str] = None,
70
+ ) -> Dict[str, Any]: ...
71
+
72
+ def load_bytes(self, artifact_id: str) -> bytes: ...
73
+
74
+ def get_metadata(self, artifact_id: str) -> Optional[Dict[str, Any]]: ...
75
+
76
+
77
+ class RuntimeArtifactStoreAdapter:
78
+ """Duck-typed adapter for AbstractRuntime's ArtifactStore (no hard dependency)."""
79
+
80
+ def __init__(self, artifact_store: Any):
81
+ self._store = artifact_store
82
+
83
+ def store_bytes(
84
+ self,
85
+ content: bytes,
86
+ *,
87
+ content_type: str,
88
+ filename: Optional[str] = None,
89
+ metadata: Optional[Dict[str, Any]] = None,
90
+ tags: Optional[Dict[str, str]] = None,
91
+ run_id: Optional[str] = None,
92
+ artifact_id: Optional[str] = None,
93
+ ) -> Dict[str, Any]:
94
+ store_fn = getattr(self._store, "store", None)
95
+ if not callable(store_fn):
96
+ raise TypeError("Provided artifact_store does not have a callable .store(...)")
97
+
98
+ content_b = bytes(content)
99
+ content_type = str(content_type or "application/octet-stream")
100
+ sha = sha256_hex(content_b)
101
+
102
+ merged_tags: Dict[str, str] = {}
103
+ if isinstance(tags, dict):
104
+ merged_tags.update({str(k): str(v) for k, v in tags.items()})
105
+ if filename and "filename" not in merged_tags:
106
+ merged_tags["filename"] = str(filename)
107
+ if sha and "sha256" not in merged_tags:
108
+ merged_tags["sha256"] = sha
109
+
110
+ try:
111
+ meta = store_fn(
112
+ content_b,
113
+ content_type=content_type,
114
+ run_id=str(run_id) if run_id else None,
115
+ tags=merged_tags or None,
116
+ artifact_id=str(artifact_id) if artifact_id else None,
117
+ )
118
+ except TypeError:
119
+ meta = store_fn(
120
+ content_b,
121
+ content_type=content_type,
122
+ run_id=str(run_id) if run_id else None,
123
+ tags=merged_tags or None,
124
+ )
125
+
126
+ artifact_id_out = None
127
+ if isinstance(meta, dict):
128
+ artifact_id_out = meta.get("artifact_id")
129
+ elif hasattr(meta, "artifact_id"):
130
+ artifact_id_out = getattr(meta, "artifact_id", None)
131
+ if not isinstance(artifact_id_out, str) or not artifact_id_out.strip():
132
+ raise TypeError("artifact_store.store(...) did not return a usable artifact_id")
133
+
134
+ return make_media_ref(
135
+ str(artifact_id_out),
136
+ content_type=content_type,
137
+ filename=str(filename) if filename else None,
138
+ sha256=sha,
139
+ size_bytes=len(content_b),
140
+ metadata=metadata if isinstance(metadata, dict) else None,
141
+ )
142
+
143
+ def load_bytes(self, artifact_id: str) -> bytes:
144
+ load_fn = getattr(self._store, "load", None)
145
+ if not callable(load_fn):
146
+ raise TypeError("Provided artifact_store does not have a callable .load(...)")
147
+ artifact = load_fn(str(artifact_id))
148
+ if artifact is None:
149
+ raise FileNotFoundError(f"Artifact not found: {artifact_id}")
150
+ if isinstance(artifact, (bytes, bytearray)):
151
+ return bytes(artifact)
152
+ if hasattr(artifact, "content"):
153
+ return bytes(getattr(artifact, "content"))
154
+ raise TypeError("artifact_store.load(...) returned an unsupported value")
155
+
156
+ def get_metadata(self, artifact_id: str) -> Optional[Dict[str, Any]]:
157
+ meta_fn = getattr(self._store, "get_metadata", None)
158
+ if not callable(meta_fn):
159
+ return None
160
+ meta = meta_fn(str(artifact_id))
161
+ if meta is None:
162
+ return None
163
+ if isinstance(meta, dict):
164
+ return meta
165
+ to_dict = getattr(meta, "to_dict", None)
166
+ if callable(to_dict):
167
+ out = to_dict()
168
+ return out if isinstance(out, dict) else None
169
+ if is_dataclass(meta):
170
+ out = asdict(meta)
171
+ return out if isinstance(out, dict) else None
172
+ return None
173
+
@@ -0,0 +1,7 @@
1
+ """Audio utilities (small, dependency-light)."""
2
+
3
+ from .resample import linear_resample_mono
4
+ from .recorder import record_wav
5
+
6
+ __all__ = ["linear_resample_mono", "record_wav"]
7
+
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import numpy as np
7
+ import soundfile as sf
8
+
9
+
10
+ def record_wav(
11
+ output_path: str | Path,
12
+ *,
13
+ seconds: float = 6.0,
14
+ sample_rate: int = 24000,
15
+ channels: int = 1,
16
+ ) -> str:
17
+ """Record microphone audio to a WAV file (best-effort).
18
+
19
+ This is intended for interactive REPL usage (e.g., `/clone-my-voice`).
20
+ """
21
+ try:
22
+ import sounddevice as sd
23
+ except Exception as e:
24
+ raise ImportError(
25
+ "Microphone recording requires sounddevice.\n"
26
+ "Install with: pip install abstractvoice\n"
27
+ f"Original error: {e}"
28
+ ) from e
29
+
30
+ out = Path(output_path)
31
+ out.parent.mkdir(parents=True, exist_ok=True)
32
+
33
+ frames = int(sample_rate * float(seconds))
34
+ if frames <= 0:
35
+ raise ValueError("seconds must be > 0")
36
+
37
+ audio = sd.rec(frames, samplerate=sample_rate, channels=channels, dtype="float32")
38
+ sd.wait()
39
+
40
+ # downmix to mono if needed
41
+ if channels > 1:
42
+ audio = np.mean(audio, axis=1, keepdims=True)
43
+
44
+ sf.write(str(out), audio, sample_rate, subtype="PCM_16")
45
+ return str(out)
46
+
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+
5
+
6
+ def linear_resample_mono(audio: np.ndarray, src_sr: int, dst_sr: int) -> np.ndarray:
7
+ """Lightweight mono resampler (linear interpolation).
8
+
9
+ Good enough for short prompts and avoids adding heavy DSP dependencies.
10
+ """
11
+ src_sr = int(src_sr)
12
+ dst_sr = int(dst_sr)
13
+ if src_sr <= 0 or dst_sr <= 0:
14
+ return audio
15
+ if src_sr == dst_sr:
16
+ return audio
17
+ if audio is None or len(audio) < 2:
18
+ return audio
19
+
20
+ ratio = float(dst_sr) / float(src_sr)
21
+ new_len = max(1, int(round(len(audio) * ratio)))
22
+ x_old = np.linspace(0.0, 1.0, num=len(audio), endpoint=True)
23
+ x_new = np.linspace(0.0, 1.0, num=new_len, endpoint=True)
24
+ return np.interp(x_new, x_old, audio).astype(np.float32)
25
+
@@ -0,0 +1,7 @@
1
+ """Optional voice cloning support (behind `abstractvoice[cloning]`)."""
2
+
3
+ from .store import VoiceCloneStore
4
+ from .manager import VoiceCloner
5
+
6
+ __all__ = ["VoiceCloneStore", "VoiceCloner"]
7
+