cartesia 0.1.1__py2.py3-none-any.whl → 1.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cartesia/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from cartesia.tts import AsyncCartesiaTTS, CartesiaTTS
1
+ from cartesia.client import Cartesia, AsyncCartesia
2
2
 
3
- __all__ = ["CartesiaTTS", "AsyncCartesiaTTS"]
3
+ __all__ = ["Cartesia", "AsyncCartesia"]
cartesia/_types.py CHANGED
@@ -1,43 +1,37 @@
1
- from enum import Enum
2
- from typing import List, Optional, TypedDict, Union
3
-
4
- try:
5
- import numpy as np
6
-
7
- _NUMPY_AVAILABLE = True
8
- except ImportError:
9
- _NUMPY_AVAILABLE = False
10
-
11
-
12
- class AudioDataReturnType(Enum):
13
- BYTES = "bytes"
14
- ARRAY = "array"
15
-
16
-
17
- class AudioOutputFormat(Enum):
18
- """Supported output formats for the audio."""
19
-
20
- FP32 = "fp32" # float32
21
- PCM = "pcm" # 16-bit signed integer PCM
22
- FP32_16000 = "fp32_16000" # float32, 16 kHz
23
- FP32_22050 = "fp32_22050" # float32, 22.05 kHz
24
- FP32_44100 = "fp32_44100" # float32, 44.1 kHz
25
- PCM_16000 = "pcm_16000" # 16-bit signed integer PCM, 16 kHz
26
- PCM_22050 = "pcm_22050" # 16-bit signed integer PCM, 22.05 kHz
27
- PCM_44100 = "pcm_44100" # 16-bit signed integer PCM, 44.1 kHz
28
- MULAW_8000 = "mulaw_8000" # 8-bit mu-law, 8 kHz
29
-
30
-
31
- class AudioOutput(TypedDict):
32
- audio: Union[bytes, "np.ndarray"]
33
- sampling_rate: int
34
-
35
-
36
- Embedding = List[float]
37
-
38
-
1
+ from typing import List, TypedDict
2
+
3
+ class OutputFormatMapping:
4
+ _format_mapping = {
5
+ "fp32": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
6
+ "pcm": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
7
+ "fp32_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
8
+ "fp32_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
9
+ "fp32_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
10
+ "pcm_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
11
+ "pcm_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
12
+ "pcm_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
13
+ "mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
14
+ "alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
15
+ }
16
+
17
+ @classmethod
18
+ def get_format(cls, format_name):
19
+ if format_name in cls._format_mapping:
20
+ return cls._format_mapping[format_name]
21
+ else:
22
+ raise ValueError(f"Unsupported format: {format_name}")
23
+
39
24
  class VoiceMetadata(TypedDict):
40
25
  id: str
41
26
  name: str
42
27
  description: str
43
- embedding: Optional[Embedding]
28
+ embedding: List[float]
29
+ is_public: bool
30
+ user_id: str
31
+ created_at: str
32
+ language: str
33
+
34
+ class OutputFormat(TypedDict):
35
+ container: str
36
+ encoding: str
37
+ sample_rate: int