cartesia 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. cartesia-1.3.0/.github/workflows/ci.yaml +64 -0
  2. cartesia-1.3.0/.github/workflows/publish.yaml +58 -0
  3. cartesia-1.3.0/.gitignore +60 -0
  4. cartesia-1.3.0/Makefile +13 -0
  5. {cartesia-1.2.0 → cartesia-1.3.0}/PKG-INFO +4 -5
  6. cartesia-1.3.0/bumpversion.py +41 -0
  7. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_async_websocket.py +56 -9
  8. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_constants.py +1 -0
  9. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_websocket.py +4 -1
  10. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/utils/tts.py +4 -0
  11. cartesia-1.3.0/cartesia/version.py +1 -0
  12. {cartesia-1.2.0 → cartesia-1.3.0}/pyproject.toml +6 -2
  13. cartesia-1.3.0/tests/__init__.py +0 -0
  14. cartesia-1.3.0/tests/resources/sample-speech-4s.wav +0 -0
  15. {cartesia-1.2.0 → cartesia-1.3.0}/tests/test_tts.py +77 -162
  16. cartesia-1.3.0/uv.lock +1454 -0
  17. cartesia-1.2.0/cartesia/version.py +0 -1
  18. cartesia-1.2.0/cartesia.egg-info/PKG-INFO +0 -662
  19. cartesia-1.2.0/cartesia.egg-info/SOURCES.txt +0 -29
  20. cartesia-1.2.0/cartesia.egg-info/dependency_links.txt +0 -1
  21. cartesia-1.2.0/cartesia.egg-info/requires.txt +0 -5
  22. cartesia-1.2.0/cartesia.egg-info/top_level.txt +0 -1
  23. cartesia-1.2.0/setup.cfg +0 -4
  24. {cartesia-1.2.0 → cartesia-1.3.0}/LICENSE.md +0 -0
  25. {cartesia-1.2.0 → cartesia-1.3.0}/README.md +0 -0
  26. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/__init__.py +0 -0
  27. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_async_sse.py +0 -0
  28. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_logger.py +0 -0
  29. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_sse.py +0 -0
  30. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/_types.py +0 -0
  31. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/async_client.py +0 -0
  32. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/async_tts.py +0 -0
  33. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/client.py +0 -0
  34. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/resource.py +0 -0
  35. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/tts.py +0 -0
  36. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/utils/__init__.py +0 -0
  37. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/utils/deprecated.py +0 -0
  38. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/utils/retry.py +0 -0
  39. {cartesia-1.2.0 → cartesia-1.3.0}/cartesia/voices.py +0 -0
  40. {cartesia-1.2.0 → cartesia-1.3.0}/tests/test_deprecated.py +0 -0
@@ -0,0 +1,64 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ # Allows you to run this workflow manually from the Actions tab
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ Linting:
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ matrix:
15
+ python-version: ["3.10"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v3
22
+ with:
23
+ enable-cache: true
24
+
25
+ - name: Set up Python
26
+ run: uv python install
27
+
28
+ - name: Install the project
29
+ run: uv sync --all-extras --dev
30
+
31
+ - name: Lint with ruff
32
+ run: |
33
+ make lint
34
+
35
+ Build:
36
+ runs-on: ubuntu-latest
37
+ # There is an issue with infinitely running tests when something fails due to failure to close the WebSocket, so we set a timeout.
38
+ timeout-minutes: 5
39
+ strategy:
40
+ matrix:
41
+ # If we test it against too many versions, we are making unnecessary
42
+ # requests to the production server.
43
+ python-version: ["3.8", "3.10"]
44
+
45
+ steps:
46
+ - uses: actions/checkout@v4
47
+
48
+ - name: Install uv
49
+ uses: astral-sh/setup-uv@v3
50
+ with:
51
+ enable-cache: true
52
+
53
+ - name: Set up Python ${{ matrix.python-version }}
54
+ run: uv python install ${{ matrix.python-version }}
55
+
56
+ - name: Install the project
57
+ run: uv sync --all-extras --dev
58
+
59
+ - name: Test
60
+ env:
61
+ CARTESIA_API_KEY: ${{ secrets.TESTING_CARTESIA_API_KEY }}
62
+ CARTESIA_TEST_DEPRECATED: "true"
63
+ run: |
64
+ make test
@@ -0,0 +1,58 @@
1
+ name: Publish
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ # if the version.py changes, we should re-publish
8
+ # this also allows us to run the workflow manually without skipping.
9
+ - "cartesia/version.py"
10
+
11
+ # Allows you to run this workflow manually from the Actions tab
12
+ workflow_dispatch:
13
+
14
+ jobs:
15
+ publish:
16
+ runs-on: ubuntu-latest
17
+ environment:
18
+ name: pypi
19
+ url: https://pypi.org/p/cartesia
20
+ # TODO: Make permissions more restrictive
21
+ permissions: write-all
22
+ env:
23
+ commitmsg: ${{ github.event.head_commit.message }}
24
+
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+
28
+ - name: Install uv
29
+ uses: astral-sh/setup-uv@v3
30
+ with:
31
+ enable-cache: true
32
+
33
+ - name: Set up Python
34
+ run: uv python install
35
+
36
+ - name: Install the project
37
+ run: uv sync --all-extras --dev
38
+
39
+ - name: Build the project
40
+ run: uv build
41
+
42
+ - name: Publish to PyPI
43
+ run: uv publish
44
+
45
+ - name: Get version
46
+ run: |
47
+ export CARTESIA_PYTHON_VERSION=$(grep -o '".*"' cartesia/version.py | sed 's/"//g')
48
+ echo "CARTESIA_PYTHON_VERSION=${CARTESIA_PYTHON_VERSION}" >> $GITHUB_ENV
49
+
50
+ - name: Verify version
51
+ run: echo ${{ env.CARTESIA_PYTHON_VERSION }}
52
+
53
+ - name: Bump version and push tag
54
+ id: tag_version
55
+ uses: mathieudutour/github-tag-action@v6.1
56
+ with:
57
+ github_token: ${{ secrets.GITHUB_TOKEN }}
58
+ custom_tag: ${{ env.CARTESIA_PYTHON_VERSION }}
@@ -0,0 +1,60 @@
1
+ data.tar.gz
2
+ *.pth
3
+ *.tsf
4
+ *.ckpt
5
+ .ipynb_checkpoints
6
+ */.ipynb_checkpoints/*
7
+ *.lprof
8
+ .coverage
9
+ .DS_Store
10
+ .idea/
11
+ .vscode/
12
+ outputs/
13
+ outputs
14
+ # logs can either be a directory or symlinked to a directory
15
+ # ignore both
16
+ logs/
17
+ logs
18
+
19
+ data
20
+
21
+ # Created by https://www.gitignore.io/api/python
22
+ # Edit at https://www.gitignore.io/?templates=python
23
+
24
+ ### Python ###
25
+ # Byte-compiled / optimized / DLL files
26
+ __pycache__/
27
+ *.py[cod]
28
+ *$py.class
29
+
30
+ # C extensions
31
+ *.so
32
+
33
+ # Distribution / packaging
34
+ .Python
35
+ build/
36
+ develop-eggs/
37
+ dist/
38
+ downloads/
39
+ eggs/
40
+ .eggs/
41
+ lib/
42
+ lib64/
43
+ parts/
44
+ sdist/
45
+ var/
46
+ wheels/
47
+ pip-wheel-metadata/
48
+ share/python-wheels/
49
+ *.egg-info/
50
+ .installed.cfg
51
+ *.egg
52
+ MANIFEST
53
+
54
+ # pyenv
55
+ .python-version
56
+
57
+ # scratch
58
+ scratch/
59
+
60
+ # End of https://www.gitignore.io/api/python
@@ -0,0 +1,13 @@
1
+ format:
2
+ uvx ruff check --fix .
3
+ uvx ruff format .
4
+
5
+ lint:
6
+ uvx ruff check .
7
+ uvx ruff format --check .
8
+
9
+ test:
10
+ uv run pytest -ra tests/ -sv --cov=cartesia/ --log-cli-level=INFO
11
+
12
+ bump: # Use as `make bump version=<version>`
13
+ uv run -m bumpversion $(version)
@@ -1,15 +1,14 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: cartesia
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: The official Python library for the Cartesia API.
5
5
  Requires-Python: >=3.9
6
- Description-Content-Type: text/markdown
7
- License-File: LICENSE.md
8
6
  Requires-Dist: aiohttp>=3.10.10
9
7
  Requires-Dist: httpx>=0.27.2
10
8
  Requires-Dist: iterators>=0.2.0
11
9
  Requires-Dist: requests>=2.32.3
12
- Requires-Dist: websockets>=13.1
10
+ Requires-Dist: websockets>=10.4
11
+ Description-Content-Type: text/markdown
13
12
 
14
13
  # Cartesia Python API Library
15
14
 
@@ -0,0 +1,41 @@
1
+ """Bump the version of the package.
2
+
3
+ Usage: bumpversion.py <version>
4
+
5
+ <version> must be in the format of <major>.<minor>.<patch>[-<prelabel><preversion>]
6
+ """
7
+
8
+ import re
9
+ import tomlkit
10
+ import sys
11
+ from cartesia.version import __version__
12
+
13
+ VERSION_REGEX = r"""(?x)
14
+ (?P<major>0|[1-9]\d*)\.
15
+ (?P<minor>0|[1-9]\d*)\.
16
+ (?P<patch>0|[1-9]\d*)
17
+ (?:
18
+ - # dash separator for pre-release section
19
+ (?P<prelabel>[a-zA-Z-]+) # pre-release label
20
+ (?P<preversion>0|[1-9]\d*) # pre-release version number
21
+ )? # pre-release section is optional
22
+ """ # Source: https://github.com/callowayproject/bump-my-version
23
+
24
+
25
+ def main(version: str):
26
+ assert re.match(VERSION_REGEX, version), "Invalid version format"
27
+
28
+ with open("pyproject.toml", "r") as f:
29
+ pyproject = tomlkit.load(f)
30
+
31
+ pyproject["project"]["version"] = version
32
+
33
+ with open("pyproject.toml", "w") as f:
34
+ tomlkit.dump(pyproject, f)
35
+
36
+ with open("cartesia/version.py", "w") as f:
37
+ f.write(f'__version__ = "{version}"\n')
38
+
39
+
40
+ if __name__ == "__main__":
41
+ main(sys.argv[1])
@@ -6,7 +6,7 @@ from typing import Any, AsyncGenerator, Callable, Dict, List, Optional, Union
6
6
 
7
7
  import aiohttp
8
8
 
9
- from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_VOICE_EMBEDDING
9
+ from cartesia._constants import DEFAULT_MODEL_ID, DEFAULT_OUTPUT_FORMAT, DEFAULT_VOICE_EMBEDDING
10
10
  from cartesia._types import OutputFormat, VoiceControls
11
11
  from cartesia._websocket import _WebSocket
12
12
  from cartesia.tts import TTS
@@ -45,6 +45,7 @@ class _AsyncTTSContext:
45
45
  voice_embedding: Optional[List[float]] = None,
46
46
  context_id: Optional[str] = None,
47
47
  continue_: bool = False,
48
+ flush: bool = False,
48
49
  duration: Optional[int] = None,
49
50
  language: Optional[str] = None,
50
51
  add_timestamps: bool = False,
@@ -60,6 +61,7 @@ class _AsyncTTSContext:
60
61
  voice_embedding: The embedding of the voice to use for generating audio.
61
62
  context_id: The context ID to use for the request. If not specified, a random context ID will be generated.
62
63
  continue_: Whether to continue the audio generation from the previous transcript or not.
64
+ flush: Whether to trigger a manual flush for the current context's generation.
63
65
  duration: The duration of the audio in seconds.
64
66
  language: The language code for the audio request. This can only be used with `model_id = sonic-multilingual`.
65
67
  add_timestamps: Whether to return word-level timestamps.
@@ -71,7 +73,7 @@ class _AsyncTTSContext:
71
73
  """
72
74
  if context_id is not None and context_id != self._context_id:
73
75
  raise ValueError("Context ID does not match the context ID of the current context.")
74
- if continue_ and transcript == "":
76
+ if continue_ and transcript == "" and not flush:
75
77
  raise ValueError("Transcript cannot be empty when continue_ is True.")
76
78
 
77
79
  await self._websocket.connect()
@@ -87,6 +89,7 @@ class _AsyncTTSContext:
87
89
  context_id=self._context_id,
88
90
  add_timestamps=add_timestamps,
89
91
  continue_=continue_,
92
+ flush=flush,
90
93
  _experimental_voice_controls=_experimental_voice_controls,
91
94
  )
92
95
 
@@ -100,12 +103,49 @@ class _AsyncTTSContext:
100
103
  await self.send(
101
104
  model_id=DEFAULT_MODEL_ID,
102
105
  transcript="",
103
- output_format=TTS.get_output_format("raw_pcm_f32le_44100"),
106
+ output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
104
107
  voice_embedding=DEFAULT_VOICE_EMBEDDING, # Default voice embedding since it's a required input for now.
105
108
  context_id=self._context_id,
106
109
  continue_=False,
107
110
  )
108
111
 
112
+ async def flush(self) -> Callable[[], AsyncGenerator[Dict[str, Any], None]]:
113
+ """Trigger a manual flush for the current context's generation. This method returns a generator that yields the audio prior to the flush."""
114
+ await self.send(
115
+ model_id=DEFAULT_MODEL_ID,
116
+ transcript="",
117
+ output_format=TTS.get_output_format(DEFAULT_OUTPUT_FORMAT),
118
+ voice_embedding=DEFAULT_VOICE_EMBEDDING, # Default voice embedding since it's a required input for now.
119
+ context_id=self._context_id,
120
+ continue_=True,
121
+ flush=True,
122
+ )
123
+
124
+ # Save the old flush ID
125
+ flush_id = len(self._websocket._context_queues[self._context_id]) - 1
126
+
127
+ # Create a new Async Queue to store the responses for the new flush ID
128
+ self._websocket._context_queues[self._context_id].append(asyncio.Queue())
129
+
130
+ # Return the generator for the old flush ID
131
+ async def generator():
132
+ try:
133
+ while True:
134
+ response = await self._websocket._get_message(
135
+ self._context_id, timeout=self.timeout, flush_id=flush_id
136
+ )
137
+ if "error" in response:
138
+ raise RuntimeError(f"Error generating audio:\n{response['error']}")
139
+ if response.get("flush_done") or response["done"]:
140
+ break
141
+ yield self._websocket._convert_response(response, include_context_id=True)
142
+ except Exception as e:
143
+ if isinstance(e, asyncio.TimeoutError):
144
+ raise RuntimeError("Timeout while waiting for audio chunk")
145
+ raise RuntimeError(f"Failed to generate audio:\n{e}")
146
+
147
+ return generator
148
+
109
149
  async def receive(self) -> AsyncGenerator[Dict[str, Any], None]:
110
150
  """Receive the audio chunks from the WebSocket. This method is a generator that yields audio chunks.
111
151
 
@@ -175,7 +215,7 @@ class _AsyncWebSocket(_WebSocket):
175
215
  self.timeout = timeout
176
216
  self._get_session = get_session
177
217
  self.websocket = None
178
- self._context_queues: Dict[str, asyncio.Queue] = {}
218
+ self._context_queues: Dict[str, List[asyncio.Queue]] = {}
179
219
  self._processing_task: asyncio.Task = None
180
220
 
181
221
  def __del__(self):
@@ -213,7 +253,7 @@ class _AsyncWebSocket(_WebSocket):
213
253
  except asyncio.CancelledError:
214
254
  pass
215
255
  except TypeError as e:
216
- # Ignore the error if the task is already cancelled
256
+ # Ignore the error if the task is already canceled.
217
257
  # For some reason we are getting None responses
218
258
  # TODO: This needs to be fixed - we need to think about why we are getting None responses.
219
259
  if "Received message 256:None" not in str(e):
@@ -284,16 +324,23 @@ class _AsyncWebSocket(_WebSocket):
284
324
  response = await self.websocket.receive_json()
285
325
  if response["context_id"]:
286
326
  context_id = response["context_id"]
327
+ flush_id = response.get("flush_id", -1)
287
328
  if context_id in self._context_queues:
288
- await self._context_queues[context_id].put(response)
329
+ await self._context_queues[context_id][flush_id].put(response)
289
330
  except Exception as e:
290
331
  self._error = e
291
332
  raise e
292
333
 
293
- async def _get_message(self, context_id: str, timeout: float) -> Dict[str, Any]:
334
+ async def _get_message(
335
+ self, context_id: str, timeout: float, flush_id: Optional[int] = -1
336
+ ) -> Dict[str, Any]:
294
337
  if context_id not in self._context_queues:
295
338
  raise ValueError(f"Context ID {context_id} not found.")
296
- return await asyncio.wait_for(self._context_queues[context_id].get(), timeout=timeout)
339
+ if len(self._context_queues[context_id]) <= flush_id:
340
+ raise ValueError(f"Flush ID {flush_id} not found for context ID {context_id}.")
341
+ return await asyncio.wait_for(
342
+ self._context_queues[context_id][flush_id].get(), timeout=timeout
343
+ )
297
344
 
298
345
  def _remove_context(self, context_id: str):
299
346
  if context_id in self._context_queues:
@@ -309,5 +356,5 @@ class _AsyncWebSocket(_WebSocket):
309
356
  if context_id is None:
310
357
  context_id = str(uuid.uuid4())
311
358
  if context_id not in self._context_queues:
312
- self._context_queues[context_id] = asyncio.Queue()
359
+ self._context_queues[context_id] = [asyncio.Queue()]
313
360
  return _AsyncTTSContext(context_id, self, self.timeout)
@@ -2,6 +2,7 @@ DEFAULT_MODEL_ID = "sonic-english" # latest default model
2
2
  MULTILINGUAL_MODEL_ID = "sonic-multilingual" # latest multilingual model
3
3
  DEFAULT_BASE_URL = "api.cartesia.ai"
4
4
  DEFAULT_CARTESIA_VERSION = "2024-06-10" # latest version
5
+ DEFAULT_OUTPUT_FORMAT = "raw_pcm_f32le_44100"
5
6
  DEFAULT_TIMEOUT = 30 # seconds
6
7
  DEFAULT_NUM_CONNECTIONS = 10 # connections per client
7
8
  DEFAULT_VOICE_EMBEDDING = [1.0] * 192
@@ -239,7 +239,7 @@ class _WebSocket:
239
239
  self._contexts.clear()
240
240
 
241
241
  def _convert_response(
242
- self, response: Dict[str, any], include_context_id: bool
242
+ self, response: Dict[str, any], include_context_id: bool, include_flush_id: bool = False
243
243
  ) -> Dict[str, Any]:
244
244
  out = {}
245
245
  if response["type"] == EventType.AUDIO:
@@ -250,6 +250,9 @@ class _WebSocket:
250
250
  if include_context_id:
251
251
  out["context_id"] = response["context_id"]
252
252
 
253
+ if include_flush_id and "flush_id" in response:
254
+ out["flush_id"] = response["flush_id"]
255
+
253
256
  return out
254
257
 
255
258
  def send(
@@ -37,6 +37,7 @@ def _construct_tts_request(
37
37
  add_timestamps: bool = False,
38
38
  context_id: Optional[str] = None,
39
39
  continue_: bool = False,
40
+ flush: bool = False,
40
41
  _experimental_voice_controls: Optional[VoiceControls] = None,
41
42
  ):
42
43
  tts_request = {
@@ -71,4 +72,7 @@ def _construct_tts_request(
71
72
  if continue_:
72
73
  tts_request["continue"] = continue_
73
74
 
75
+ if flush:
76
+ tts_request["flush"] = flush
77
+
74
78
  return tts_request
@@ -0,0 +1 @@
1
+ __version__ = "1.3.0"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cartesia"
3
- version = "1.2.0"
3
+ version = "1.3.0"
4
4
  description = "The official Python library for the Cartesia API."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.9"
@@ -9,9 +9,13 @@ dependencies = [
9
9
  "httpx>=0.27.2",
10
10
  "iterators>=0.2.0",
11
11
  "requests>=2.32.3",
12
- "websockets>=13.1",
12
+ "websockets>=10.4",
13
13
  ]
14
14
 
15
+ [build-system]
16
+ requires = ["hatchling"]
17
+ build-backend = "hatchling.build"
18
+
15
19
  [tool.uv]
16
20
  dev-dependencies = [
17
21
  "isort>=5.13.2",
File without changes