cartesia 0.1.1__py2.py3-none-any.whl → 1.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cartesia/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from cartesia.tts import AsyncCartesiaTTS, CartesiaTTS
1
+ from cartesia.client import Cartesia, AsyncCartesia
2
2
 
3
- __all__ = ["CartesiaTTS", "AsyncCartesiaTTS"]
3
+ __all__ = ["Cartesia", "AsyncCartesia"]
cartesia/_types.py CHANGED
@@ -1,43 +1,75 @@
1
- from enum import Enum
2
- from typing import List, Optional, TypedDict, Union
1
+ from typing import List, TypedDict
2
+ from cartesia.utils.deprecated import deprecated
3
3
 
4
- try:
5
- import numpy as np
6
4
 
7
- _NUMPY_AVAILABLE = True
8
- except ImportError:
9
- _NUMPY_AVAILABLE = False
5
+ class OutputFormatMapping:
6
+ _format_mapping = {
7
+ "raw_pcm_f32le_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
8
+ "raw_pcm_s16le_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
9
+ "raw_pcm_f32le_24000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 24000},
10
+ "raw_pcm_s16le_24000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 24000},
11
+ "raw_pcm_f32le_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
12
+ "raw_pcm_s16le_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
13
+ "raw_pcm_f32le_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
14
+ "raw_pcm_s16le_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
15
+ "raw_pcm_f32le_8000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 8000},
16
+ "raw_pcm_s16le_8000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 8000},
17
+ "raw_pcm_mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
18
+ "raw_pcm_alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
19
+ }
10
20
 
21
+ @classmethod
22
+ def get_format(cls, format_name):
23
+ if format_name in cls._format_mapping:
24
+ return cls._format_mapping[format_name]
25
+ else:
26
+ raise ValueError(f"Unsupported format: {format_name}")
11
27
 
12
- class AudioDataReturnType(Enum):
13
- BYTES = "bytes"
14
- ARRAY = "array"
15
28
 
29
+ class DeprecatedOutputFormatMapping:
30
+ """Deprecated formats as of v1.0.1. These will be removed in v1.2.0. Use :class:`OutputFormatMapping` instead."""
16
31
 
17
- class AudioOutputFormat(Enum):
18
- """Supported output formats for the audio."""
32
+ _format_mapping = {
33
+ "fp32": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
34
+ "pcm": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
35
+ "fp32_8000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 8000},
36
+ "fp32_16000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 16000},
37
+ "fp32_22050": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 22050},
38
+ "fp32_24000": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 24000},
39
+ "fp32_44100": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
40
+ "pcm_8000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 8000},
41
+ "pcm_16000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 16000},
42
+ "pcm_22050": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 22050},
43
+ "pcm_24000": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 24000},
44
+ "pcm_44100": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
45
+ "mulaw_8000": {"container": "raw", "encoding": "pcm_mulaw", "sample_rate": 8000},
46
+ "alaw_8000": {"container": "raw", "encoding": "pcm_alaw", "sample_rate": 8000},
47
+ }
19
48
 
20
- FP32 = "fp32" # float32
21
- PCM = "pcm" # 16-bit signed integer PCM
22
- FP32_16000 = "fp32_16000" # float32, 16 kHz
23
- FP32_22050 = "fp32_22050" # float32, 22.05 kHz
24
- FP32_44100 = "fp32_44100" # float32, 44.1 kHz
25
- PCM_16000 = "pcm_16000" # 16-bit signed integer PCM, 16 kHz
26
- PCM_22050 = "pcm_22050" # 16-bit signed integer PCM, 22.05 kHz
27
- PCM_44100 = "pcm_44100" # 16-bit signed integer PCM, 44.1 kHz
28
- MULAW_8000 = "mulaw_8000" # 8-bit mu-law, 8 kHz
29
-
30
-
31
- class AudioOutput(TypedDict):
32
- audio: Union[bytes, "np.ndarray"]
33
- sampling_rate: int
34
-
35
-
36
- Embedding = List[float]
49
+ @deprecated(
50
+ vdeprecated="1.0.1",
51
+ vremove="1.2.0",
52
+ reason="Old output format names are being deprecated in favor of names aligned with the Cartesia API. Use names from `OutputFormatMapping` instead.",
53
+ )
54
+ def get_format_deprecated(self, format_name):
55
+ if format_name in self._format_mapping:
56
+ return self._format_mapping[format_name]
57
+ else:
58
+ raise ValueError(f"Unsupported format: {format_name}")
37
59
 
38
60
 
39
61
  class VoiceMetadata(TypedDict):
40
62
  id: str
41
63
  name: str
42
64
  description: str
43
- embedding: Optional[Embedding]
65
+ embedding: List[float]
66
+ is_public: bool
67
+ user_id: str
68
+ created_at: str
69
+ language: str
70
+
71
+
72
+ class OutputFormat(TypedDict):
73
+ container: str
74
+ encoding: str
75
+ sample_rate: int