pipecat-ai-typecast 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ __pycache__/
2
+
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .env
@@ -0,0 +1,16 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.0.1] - 2025-11-12
9
+
10
+ ### Added
11
+ - Initial release of Typecast TTS integration for Pipecat
12
+ - `TypecastTTSService` for high-quality TTS service
13
+
14
+ ### Documentation
15
+ - README with installation and usage instructions
16
+ - Foundational example demonstrating complete pipecat bot setup
@@ -0,0 +1,24 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2024–2025, Daily
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: pipecat-ai-typecast
3
+ Version: 0.0.1
4
+ Summary: Typecast is an AI text-to-speech API that converts text into lifelike, expressive speech in many languages.
5
+ Author-email: Neosapience <help@typecast.ai>
6
+ Maintainer-email: Neosapience <help@typecast.ai>
7
+ License-Expression: BSD-2-Clause
8
+ Project-URL: Homepage, https://typecast.ai
9
+ Project-URL: Documentation, https://typecast.ai/docs/overview
10
+ Project-URL: Source, https://github.com/neosapience/pipecat-typecast
11
+ Keywords: pipecat,text-to-speech,ai
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: pipecat-ai>=0.0.94
20
+ Requires-Dist: aiohttp>=3.9.0
21
+ Requires-Dist: loguru>=0.7.0
22
+ Requires-Dist: pydantic>=2.0.0
23
+ Dynamic: license-file
24
+
25
+ # Pipecat Typecast TTS Integration
26
+
27
+ Add high-quality neural voices from [Typecast](https://typecast.ai/) to your Pipecat AI pipelines.
28
+
29
+ **Maintainer:** Neosapience / Typecast team (@neosapience)
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install pipecat-ai-typecast
35
+ ```
36
+
37
+ ## Prerequisites
38
+
39
+ - Typecast API key (`TYPECAST_API_KEY`)
40
+ - Optional: Voice override (`TYPECAST_VOICE_ID`) – defaults to `tc_62a8975e695ad26f7fb514d1`
41
+
42
+ ## Usage with Pipecat Pipeline
43
+
44
+ `TypecastTTSService` integrates Typecast's streaming text-to-speech into a Pipecat pipeline. It converts LLM text output into expressive speech while leveraging Pipecat's transport, STT, and turn-taking stack.
45
+
46
+ ```python
47
+ import os, aiohttp
48
+ from pipecat.pipeline.pipeline import Pipeline
49
+ from pipecat_typecast.tts import TypecastTTSService
50
+
51
+ async with aiohttp.ClientSession() as session:
52
+ llm = ...
53
+ sst = ...
54
+ tts = TypecastTTSService(
55
+ aiohttp_session=session,
56
+ api_key=os.getenv("TYPECAST_API_KEY"),
57
+ voice_id=os.getenv("TYPECAST_VOICE_ID", "tc_62a8975e695ad26f7fb514d1"),
58
+ )
59
+
60
+ pipeline = Pipeline([
61
+ transport.input(), # audio/user input
62
+ stt, # speech to text
63
+ context_aggregator.user(), # add user text to context
64
+ llm, # LLM generates response
65
+ tts, # Typecast TTS synthesis
66
+ transport.output(), # stream audio back to user
67
+ context_aggregator.assistant(), # store assistant response
68
+ ])
69
+ ```
70
+
71
+ See [`example.py`](example.py) for a complete working example including event handlers and transport setup.
72
+
73
+ ### Advanced Configuration (Emotion & Audio Controls)
74
+
75
+ `TypecastTTSService` exposes structured parameter models so you can tune emotion and audio output.
76
+
77
+ ```python
78
+ from pipecat_typecast.tts import (
79
+ TypecastTTSService,
80
+ TypecastInputParams,
81
+ PromptOptions,
82
+ OutputOptions,
83
+ )
84
+
85
+ params = TypecastInputParams(
86
+ # Language influences pronunciation model (defaults to English)
87
+ # Language.EN / Language.KO / Language.JA ...
88
+ # If omitted, Typecast auto-detect may apply (depending on voice).
89
+ prompt_options=PromptOptions(
90
+ emotion_preset="happy", # normal | happy | sad | angry | whisper (voice dependent)
91
+ emotion_intensity=1.3, # 0.0–2.0 (float)
92
+ ),
93
+ output_options=OutputOptions(
94
+ volume=110, # 0–200 (percent)
95
+ audio_pitch=2, # -12..12 (semitones)
96
+ audio_tempo=1.05, # 0.5–2.0 (playback speed)
97
+ audio_format="wav", # Only 'wav' currently supported
98
+ ),
99
+ )
100
+
101
+ tts = TypecastTTSService(
102
+ aiohttp_session=session,
103
+ api_key=os.getenv("TYPECAST_API_KEY"),
104
+ voice_id="tc_62a8975e695ad26f7fb514d1", # Replace with another voice ID as desired
105
+ model="ssfm-v21", # Default model
106
+ params=params,
107
+ )
108
+ ```
109
+
110
+ Notes:
111
+ - `emotion_preset` availability varies by voice. If unsupported, the service falls back to neutral.
112
+ - `emotion_intensity` > 1.0 increases expressiveness; extreme values can sound synthetic.
113
+ - `audio_pitch` shifts pitch in musical semitone units (use small adjustments for naturalness).
114
+ - `audio_tempo` changes speaking speed; keep within 0.85–1.15 for intelligibility.
115
+ - `seed` (set in `TypecastInputParams`) provides deterministic synthesis for identical text (when supported by model).
116
+ - Unsupported `audio_format` values yield an error frame—keep `wav`.
117
+
118
+ ## Running the Example
119
+
120
+ 1. Install dependencies:
121
+ ```bash
122
+ uv sync
123
+ ```
124
+
125
+ 2. Set up your environment
126
+
127
+ ```bash
128
+ cp env.example .env
129
+ ```
130
+
131
+ 3. Run:
132
+ ```bash
133
+ uv run python example.py
134
+ ```
135
+
136
+ The bot will create a call (e.g. Daily room) and speak responses using Typecast voices.
137
+
138
+ ## Compatibility
139
+
140
+ **Tested with Pipecat v0.0.89**
141
+
142
+ - Python 3.10+
143
+ - Daily / Twilio / generic WebRTC transports (see `example.py`)
144
+
145
+ ## License
146
+
147
+ BSD-2-Clause - see [LICENSE](LICENSE)
148
+
149
+ ## Support
150
+
151
+ - Docs: https://typecast.ai (refer to API docs for voice IDs & parameters)
152
+ - Pipecat Discord: https://discord.gg/pipecat (`#community-integrations`)
@@ -0,0 +1,128 @@
1
+ # Pipecat Typecast TTS Integration
2
+
3
+ Add high-quality neural voices from [Typecast](https://typecast.ai/) to your Pipecat AI pipelines.
4
+
5
+ **Maintainer:** Neosapience / Typecast team (@neosapience)
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install pipecat-ai-typecast
11
+ ```
12
+
13
+ ## Prerequisites
14
+
15
+ - Typecast API key (`TYPECAST_API_KEY`)
16
+ - Optional: Voice override (`TYPECAST_VOICE_ID`) – defaults to `tc_62a8975e695ad26f7fb514d1`
17
+
18
+ ## Usage with Pipecat Pipeline
19
+
20
+ `TypecastTTSService` integrates Typecast's streaming text-to-speech into a Pipecat pipeline. It converts LLM text output into expressive speech while leveraging Pipecat's transport, STT, and turn-taking stack.
21
+
22
+ ```python
23
+ import os, aiohttp
24
+ from pipecat.pipeline.pipeline import Pipeline
25
+ from pipecat_typecast.tts import TypecastTTSService
26
+
27
+ async with aiohttp.ClientSession() as session:
28
+ llm = ...
29
+ sst = ...
30
+ tts = TypecastTTSService(
31
+ aiohttp_session=session,
32
+ api_key=os.getenv("TYPECAST_API_KEY"),
33
+ voice_id=os.getenv("TYPECAST_VOICE_ID", "tc_62a8975e695ad26f7fb514d1"),
34
+ )
35
+
36
+ pipeline = Pipeline([
37
+ transport.input(), # audio/user input
38
+ stt, # speech to text
39
+ context_aggregator.user(), # add user text to context
40
+ llm, # LLM generates response
41
+ tts, # Typecast TTS synthesis
42
+ transport.output(), # stream audio back to user
43
+ context_aggregator.assistant(), # store assistant response
44
+ ])
45
+ ```
46
+
47
+ See [`example.py`](example.py) for a complete working example including event handlers and transport setup.
48
+
49
+ ### Advanced Configuration (Emotion & Audio Controls)
50
+
51
+ `TypecastTTSService` exposes structured parameter models so you can tune emotion and audio output.
52
+
53
+ ```python
54
+ from pipecat_typecast.tts import (
55
+ TypecastTTSService,
56
+ TypecastInputParams,
57
+ PromptOptions,
58
+ OutputOptions,
59
+ )
60
+
61
+ params = TypecastInputParams(
62
+ # Language influences pronunciation model (defaults to English)
63
+ # Language.EN / Language.KO / Language.JA ...
64
+ # If omitted, Typecast auto-detect may apply (depending on voice).
65
+ prompt_options=PromptOptions(
66
+ emotion_preset="happy", # normal | happy | sad | angry | whisper (voice dependent)
67
+ emotion_intensity=1.3, # 0.0–2.0 (float)
68
+ ),
69
+ output_options=OutputOptions(
70
+ volume=110, # 0–200 (percent)
71
+ audio_pitch=2, # -12..12 (semitones)
72
+ audio_tempo=1.05, # 0.5–2.0 (playback speed)
73
+ audio_format="wav", # Only 'wav' currently supported
74
+ ),
75
+ )
76
+
77
+ tts = TypecastTTSService(
78
+ aiohttp_session=session,
79
+ api_key=os.getenv("TYPECAST_API_KEY"),
80
+ voice_id="tc_62a8975e695ad26f7fb514d1", # Replace with another voice ID as desired
81
+ model="ssfm-v21", # Default model
82
+ params=params,
83
+ )
84
+ ```
85
+
86
+ Notes:
87
+ - `emotion_preset` availability varies by voice. If unsupported, the service falls back to neutral.
88
+ - `emotion_intensity` > 1.0 increases expressiveness; extreme values can sound synthetic.
89
+ - `audio_pitch` shifts pitch in musical semitone units (use small adjustments for naturalness).
90
+ - `audio_tempo` changes speaking speed; keep within 0.85–1.15 for intelligibility.
91
+ - `seed` (set in `TypecastInputParams`) provides deterministic synthesis for identical text (when supported by model).
92
+ - Unsupported `audio_format` values yield an error frame—keep `wav`.
93
+
94
+ ## Running the Example
95
+
96
+ 1. Install dependencies:
97
+ ```bash
98
+ uv sync
99
+ ```
100
+
101
+ 2. Set up your environment
102
+
103
+ ```bash
104
+ cp env.example .env
105
+ ```
106
+
107
+ 3. Run:
108
+ ```bash
109
+ uv run python example.py
110
+ ```
111
+
112
+ The bot will create a call (e.g. Daily room) and speak responses using Typecast voices.
113
+
114
+ ## Compatibility
115
+
116
+ **Tested with Pipecat v0.0.89**
117
+
118
+ - Python 3.10+
119
+ - Daily / Twilio / generic WebRTC transports (see `example.py`)
120
+
121
+ ## License
122
+
123
+ BSD-2-Clause - see [LICENSE](LICENSE)
124
+
125
+ ## Support
126
+
127
+ - Docs: https://typecast.ai (refer to API docs for voice IDs & parameters)
128
+ - Pipecat Discord: https://discord.gg/pipecat (`#community-integrations`)
@@ -0,0 +1,9 @@
1
+ DEEPGRAM_API_KEY=...
2
+
3
+ OPENAI_API_KEY=...
4
+
5
+ CARTESIA_API_KEY=...
6
+
7
+ DAILY_API_KEY=...
8
+
9
+ TYPECAST_API_KEY=...
@@ -0,0 +1,144 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+
8
+ import os
9
+
10
+ import aiohttp
11
+ from dotenv import load_dotenv
12
+ from loguru import logger
13
+
14
+ from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
15
+ from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
16
+ from pipecat.audio.vad.silero import SileroVADAnalyzer
17
+ from pipecat.audio.vad.vad_analyzer import VADParams
18
+ from pipecat.frames.frames import LLMRunFrame
19
+ from pipecat.pipeline.pipeline import Pipeline
20
+ from pipecat.pipeline.runner import PipelineRunner
21
+ from pipecat.pipeline.task import PipelineParams, PipelineTask
22
+ from pipecat.processors.aggregators.llm_context import LLMContext
23
+ from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
24
+ from pipecat.runner.types import RunnerArguments
25
+ from pipecat.runner.utils import create_transport
26
+ from pipecat.services.deepgram.stt import DeepgramSTTService
27
+ from pipecat.services.openai.llm import OpenAILLMService
28
+ from pipecat.transports.base_transport import BaseTransport, TransportParams
29
+ from pipecat.transports.daily.transport import DailyParams
30
+ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
31
+
32
+ from pipecat_typecast import TypecastTTSService
33
+
34
+ load_dotenv(override=True)
35
+
36
+
37
+ # We store functions so objects (e.g. SileroVADAnalyzer) don't get
38
+ # instantiated. The function will be called when the desired transport gets
39
+ # selected.
40
+ transport_params = {
41
+ "daily": lambda: DailyParams(
42
+ audio_in_enabled=True,
43
+ audio_out_enabled=True,
44
+ vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
45
+ turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
46
+ ),
47
+ "twilio": lambda: FastAPIWebsocketParams(
48
+ audio_in_enabled=True,
49
+ audio_out_enabled=True,
50
+ vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
51
+ turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
52
+ ),
53
+ "webrtc": lambda: TransportParams(
54
+ audio_in_enabled=True,
55
+ audio_out_enabled=True,
56
+ vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
57
+ turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
58
+ ),
59
+ }
60
+
61
+
62
+ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
63
+ logger.info("Starting bot")
64
+
65
+ async with aiohttp.ClientSession() as session:
66
+ stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
67
+
68
+ typecast_kwargs = {}
69
+ voice_id = os.getenv("TYPECAST_VOICE_ID")
70
+ if voice_id:
71
+ typecast_kwargs["voice_id"] = voice_id
72
+
73
+ tts = TypecastTTSService(
74
+ aiohttp_session=session,
75
+ api_key=os.getenv("TYPECAST_API_KEY", ""),
76
+ **typecast_kwargs,
77
+ )
78
+
79
+ llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
80
+
81
+ messages = [
82
+ {
83
+ "role": "system",
84
+ "content": (
85
+ "You are a helpful LLM in a WebRTC call. "
86
+ "Your goal is to demonstrate your capabilities succinctly. "
87
+ "Your output will be converted to audio, so avoid special characters."
88
+ ),
89
+ },
90
+ ]
91
+
92
+ context = LLMContext(messages)
93
+ context_aggregator = LLMContextAggregatorPair(context)
94
+
95
+ pipeline = Pipeline(
96
+ [
97
+ transport.input(), # Transport user input
98
+ stt,
99
+ context_aggregator.user(), # User responses
100
+ llm, # LLM
101
+ tts, # TTS
102
+ transport.output(), # Transport bot output
103
+ context_aggregator.assistant(), # Assistant spoken responses
104
+ ]
105
+ )
106
+
107
+ task = PipelineTask(
108
+ pipeline,
109
+ params=PipelineParams(
110
+ enable_metrics=True,
111
+ enable_usage_metrics=True,
112
+ ),
113
+ idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
114
+ )
115
+
116
+ @transport.event_handler("on_client_connected")
117
+ async def on_client_connected(transport, client):
118
+ logger.info("Client connected")
119
+ # Kick off the conversation.
120
+ messages.append(
121
+ {"role": "system", "content": "Please introduce yourself to the user."}
122
+ )
123
+ await task.queue_frames([LLMRunFrame()])
124
+
125
+ @transport.event_handler("on_client_disconnected")
126
+ async def on_client_disconnected(transport, client):
127
+ logger.info("Client disconnected")
128
+ await task.cancel()
129
+
130
+ runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
131
+
132
+ await runner.run(task)
133
+
134
+
135
+ async def bot(runner_args: RunnerArguments):
136
+ """Main bot entry point compatible with Pipecat Cloud."""
137
+ transport = await create_transport(runner_args, transport_params)
138
+ await run_bot(transport, runner_args)
139
+
140
+
141
+ if __name__ == "__main__":
142
+ from pipecat.runner.run import main
143
+
144
+ main()
@@ -0,0 +1,65 @@
1
+ [project]
2
+ name = "pipecat-ai-typecast"
3
+ version = "0.0.1"
4
+ description = "Typecast is an AI text-to-speech API that converts text into lifelike, expressive speech in many languages."
5
+ license = "BSD-2-Clause"
6
+ license-files = ["LICENSE"]
7
+ readme = "README.md"
8
+ requires-python = ">=3.10"
9
+ keywords = ["pipecat", "text-to-speech", "ai"]
10
+ authors = [
11
+ {name = "Neosapience", email = "help@typecast.ai"}
12
+ ]
13
+ maintainers = [
14
+ {name = "Neosapience", email = "help@typecast.ai"}
15
+ ]
16
+ classifiers = [
17
+ "Development Status :: 5 - Production/Stable",
18
+ "Intended Audience :: Developers",
19
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence"
21
+ ]
22
+ dependencies = [
23
+ "pipecat-ai>=0.0.94",
24
+ "aiohttp>=3.9.0",
25
+ "loguru>=0.7.0",
26
+ "pydantic>=2.0.0",
27
+ ]
28
+
29
+ [dependency-groups]
30
+ dev = [
31
+ "pipecat-ai[daily,openai,deepgram,runner,webrtc,local-smart-turn-v3]>=0.0.94",
32
+ "python-dotenv>=1.0.1",
33
+ "onnxruntime>=1.23.2",
34
+ "ruff>=0.12.11",
35
+ "build>=1.0.0",
36
+ "twine>=5.0.0",
37
+ ]
38
+
39
+ [project.urls]
40
+ Homepage = "https://typecast.ai"
41
+ Documentation = "https://typecast.ai/docs/overview"
42
+ Source = "https://github.com/neosapience/pipecat-typecast"
43
+
44
+ [build-system]
45
+ requires = ["setuptools>=64", "setuptools_scm>=8"]
46
+ build-backend = "setuptools.build_meta"
47
+
48
+ [tool.setuptools.packages.find]
49
+ where = ["src"]
50
+
51
+ [tool.ruff]
52
+ line-length = 100
53
+
54
+ [tool.ruff.lint]
55
+ select = [
56
+ "D", # Docstring rules
57
+ "I", # Import rules
58
+ ]
59
+
60
+ [tool.ruff.lint.per-file-ignores]
61
+ # Skip docstring checks for non-source code
62
+ "example.py" = ["D"]
63
+
64
+ [tool.ruff.lint.pydocstyle]
65
+ convention = "google"
@@ -0,0 +1,27 @@
1
+ #!/bin/bash
2
+ # PyPI publish script
3
+ # Usage: ./scripts/publish.sh [testpypi|pypi]
4
+
5
+ set -e
6
+
7
+ REPOSITORY=${1:-pypi}
8
+
9
+ echo "Cleaning previous build artifacts..."
10
+ rm -rf dist/ build/ *.egg-info
11
+
12
+ echo "Building package..."
13
+ python -m build
14
+
15
+ echo "Build complete!"
16
+
17
+ if [ "$REPOSITORY" = "testpypi" ]; then
18
+ echo "Uploading to TestPyPI..."
19
+ twine upload --repository testpypi dist/*
20
+ echo "TestPyPI publish complete!"
21
+ echo "Test installation: pip install --index-url https://test.pypi.org/simple/ pipecat-ai-typecast"
22
+ else
23
+ echo "Uploading to PyPI..."
24
+ twine upload dist/*
25
+ echo "PyPI publish complete!"
26
+ fi
27
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+