pipecat-ai-typecast 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pipecat_ai_typecast-0.0.1/.gitignore +6 -0
- pipecat_ai_typecast-0.0.1/CHANGELOG.md +16 -0
- pipecat_ai_typecast-0.0.1/LICENSE +24 -0
- pipecat_ai_typecast-0.0.1/PKG-INFO +152 -0
- pipecat_ai_typecast-0.0.1/README.md +128 -0
- pipecat_ai_typecast-0.0.1/env.example +9 -0
- pipecat_ai_typecast-0.0.1/example.py +144 -0
- pipecat_ai_typecast-0.0.1/pyproject.toml +65 -0
- pipecat_ai_typecast-0.0.1/scripts/publish.sh +27 -0
- pipecat_ai_typecast-0.0.1/setup.cfg +4 -0
- pipecat_ai_typecast-0.0.1/src/pipecat_ai_typecast.egg-info/PKG-INFO +152 -0
- pipecat_ai_typecast-0.0.1/src/pipecat_ai_typecast.egg-info/SOURCES.txt +16 -0
- pipecat_ai_typecast-0.0.1/src/pipecat_ai_typecast.egg-info/dependency_links.txt +1 -0
- pipecat_ai_typecast-0.0.1/src/pipecat_ai_typecast.egg-info/requires.txt +4 -0
- pipecat_ai_typecast-0.0.1/src/pipecat_ai_typecast.egg-info/top_level.txt +1 -0
- pipecat_ai_typecast-0.0.1/src/pipecat_typecast/__init__.py +24 -0
- pipecat_ai_typecast-0.0.1/src/pipecat_typecast/tts.py +273 -0
- pipecat_ai_typecast-0.0.1/uv.lock +3982 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.0.1] - 2025-11-12
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Initial release of Typecast TTS integration for Pipecat
|
|
12
|
+
- `TypecastTTSService` for high-quality TTS service
|
|
13
|
+
|
|
14
|
+
### Documentation
|
|
15
|
+
- README with installation and usage instructions
|
|
16
|
+
- Foundational example demonstrating complete pipecat bot setup
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
BSD 2-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024–2025, Daily
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
16
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
17
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
18
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
19
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
20
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
21
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
22
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
23
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
24
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pipecat-ai-typecast
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Typecast is an AI text-to-speech API that converts text into lifelike, expressive speech in many languages.
|
|
5
|
+
Author-email: Neosapience <help@typecast.ai>
|
|
6
|
+
Maintainer-email: Neosapience <help@typecast.ai>
|
|
7
|
+
License-Expression: BSD-2-Clause
|
|
8
|
+
Project-URL: Homepage, https://typecast.ai
|
|
9
|
+
Project-URL: Documentation, https://typecast.ai/docs/overview
|
|
10
|
+
Project-URL: Source, https://github.com/neosapience/pipecat-typecast
|
|
11
|
+
Keywords: pipecat,text-to-speech,ai
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: pipecat-ai>=0.0.94
|
|
20
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
21
|
+
Requires-Dist: loguru>=0.7.0
|
|
22
|
+
Requires-Dist: pydantic>=2.0.0
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# Pipecat Typecast TTS Integration
|
|
26
|
+
|
|
27
|
+
Add high-quality neural voices from [Typecast](https://typecast.ai/) to your Pipecat AI pipelines.
|
|
28
|
+
|
|
29
|
+
**Maintainer:** Neosapience / Typecast team (@neosapience)
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install pipecat-ai-typecast
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Prerequisites
|
|
38
|
+
|
|
39
|
+
- Typecast API key (`TYPECAST_API_KEY`)
|
|
40
|
+
- Optional: Voice override (`TYPECAST_VOICE_ID`) – defaults to `tc_62a8975e695ad26f7fb514d1`
|
|
41
|
+
|
|
42
|
+
## Usage with Pipecat Pipeline
|
|
43
|
+
|
|
44
|
+
`TypecastTTSService` integrates Typecast's streaming text-to-speech into a Pipecat pipeline. It converts LLM text output into expressive speech while leveraging Pipecat's transport, STT, and turn-taking stack.
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import os, aiohttp
|
|
48
|
+
from pipecat.pipeline.pipeline import Pipeline
|
|
49
|
+
from pipecat_typecast.tts import TypecastTTSService
|
|
50
|
+
|
|
51
|
+
async with aiohttp.ClientSession() as session:
|
|
52
|
+
llm = ...
|
|
53
|
+
sst = ...
|
|
54
|
+
tts = TypecastTTSService(
|
|
55
|
+
aiohttp_session=session,
|
|
56
|
+
api_key=os.getenv("TYPECAST_API_KEY"),
|
|
57
|
+
voice_id=os.getenv("TYPECAST_VOICE_ID", "tc_62a8975e695ad26f7fb514d1"),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
pipeline = Pipeline([
|
|
61
|
+
transport.input(), # audio/user input
|
|
62
|
+
stt, # speech to text
|
|
63
|
+
context_aggregator.user(), # add user text to context
|
|
64
|
+
llm, # LLM generates response
|
|
65
|
+
tts, # Typecast TTS synthesis
|
|
66
|
+
transport.output(), # stream audio back to user
|
|
67
|
+
context_aggregator.assistant(), # store assistant response
|
|
68
|
+
])
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
See [`example.py`](example.py) for a complete working example including event handlers and transport setup.
|
|
72
|
+
|
|
73
|
+
### Advanced Configuration (Emotion & Audio Controls)
|
|
74
|
+
|
|
75
|
+
`TypecastTTSService` exposes structured parameter models so you can tune emotion and audio output.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from pipecat_typecast.tts import (
|
|
79
|
+
TypecastTTSService,
|
|
80
|
+
TypecastInputParams,
|
|
81
|
+
PromptOptions,
|
|
82
|
+
OutputOptions,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
params = TypecastInputParams(
|
|
86
|
+
# Language influences pronunciation model (defaults to English)
|
|
87
|
+
# Language.EN / Language.KO / Language.JA ...
|
|
88
|
+
# If omitted, Typecast auto-detect may apply (depending on voice).
|
|
89
|
+
prompt_options=PromptOptions(
|
|
90
|
+
emotion_preset="happy", # normal | happy | sad | angry | whisper (voice dependent)
|
|
91
|
+
emotion_intensity=1.3, # 0.0–2.0 (float)
|
|
92
|
+
),
|
|
93
|
+
output_options=OutputOptions(
|
|
94
|
+
volume=110, # 0–200 (percent)
|
|
95
|
+
audio_pitch=2, # -12..12 (semitones)
|
|
96
|
+
audio_tempo=1.05, # 0.5–2.0 (playback speed)
|
|
97
|
+
audio_format="wav", # Only 'wav' currently supported
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
tts = TypecastTTSService(
|
|
102
|
+
aiohttp_session=session,
|
|
103
|
+
api_key=os.getenv("TYPECAST_API_KEY"),
|
|
104
|
+
voice_id="tc_62a8975e695ad26f7fb514d1", # Replace with another voice ID as desired
|
|
105
|
+
model="ssfm-v21", # Default model
|
|
106
|
+
params=params,
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Notes:
|
|
111
|
+
- `emotion_preset` availability varies by voice. If unsupported, the service falls back to neutral.
|
|
112
|
+
- `emotion_intensity` > 1.0 increases expressiveness; extreme values can sound synthetic.
|
|
113
|
+
- `audio_pitch` shifts pitch in musical semitone units (use small adjustments for naturalness).
|
|
114
|
+
- `audio_tempo` changes speaking speed; keep within 0.85–1.15 for intelligibility.
|
|
115
|
+
- `seed` (set in `TypecastInputParams`) provides deterministic synthesis for identical text (when supported by model).
|
|
116
|
+
- Unsupported `audio_format` values yield an error frame—keep `wav`.
|
|
117
|
+
|
|
118
|
+
## Running the Example
|
|
119
|
+
|
|
120
|
+
1. Install dependencies:
|
|
121
|
+
```bash
|
|
122
|
+
uv sync
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
2. Set up your environment
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
cp env.example .env
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
3. Run:
|
|
132
|
+
```bash
|
|
133
|
+
uv run python example.py
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
The bot will create a call (e.g. Daily room) and speak responses using Typecast voices.
|
|
137
|
+
|
|
138
|
+
## Compatibility
|
|
139
|
+
|
|
140
|
+
**Tested with Pipecat v0.0.89**
|
|
141
|
+
|
|
142
|
+
- Python 3.10+
|
|
143
|
+
- Daily / Twilio / generic WebRTC transports (see `example.py`)
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
BSD-2-Clause - see [LICENSE](LICENSE)
|
|
148
|
+
|
|
149
|
+
## Support
|
|
150
|
+
|
|
151
|
+
- Docs: https://typecast.ai (refer to API docs for voice IDs & parameters)
|
|
152
|
+
- Pipecat Discord: https://discord.gg/pipecat (`#community-integrations`)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Pipecat Typecast TTS Integration
|
|
2
|
+
|
|
3
|
+
Add high-quality neural voices from [Typecast](https://typecast.ai/) to your Pipecat AI pipelines.
|
|
4
|
+
|
|
5
|
+
**Maintainer:** Neosapience / Typecast team (@neosapience)
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install pipecat-ai-typecast
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Prerequisites
|
|
14
|
+
|
|
15
|
+
- Typecast API key (`TYPECAST_API_KEY`)
|
|
16
|
+
- Optional: Voice override (`TYPECAST_VOICE_ID`) – defaults to `tc_62a8975e695ad26f7fb514d1`
|
|
17
|
+
|
|
18
|
+
## Usage with Pipecat Pipeline
|
|
19
|
+
|
|
20
|
+
`TypecastTTSService` integrates Typecast's streaming text-to-speech into a Pipecat pipeline. It converts LLM text output into expressive speech while leveraging Pipecat's transport, STT, and turn-taking stack.
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
import os, aiohttp
|
|
24
|
+
from pipecat.pipeline.pipeline import Pipeline
|
|
25
|
+
from pipecat_typecast.tts import TypecastTTSService
|
|
26
|
+
|
|
27
|
+
async with aiohttp.ClientSession() as session:
|
|
28
|
+
llm = ...
|
|
29
|
+
sst = ...
|
|
30
|
+
tts = TypecastTTSService(
|
|
31
|
+
aiohttp_session=session,
|
|
32
|
+
api_key=os.getenv("TYPECAST_API_KEY"),
|
|
33
|
+
voice_id=os.getenv("TYPECAST_VOICE_ID", "tc_62a8975e695ad26f7fb514d1"),
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
pipeline = Pipeline([
|
|
37
|
+
transport.input(), # audio/user input
|
|
38
|
+
stt, # speech to text
|
|
39
|
+
context_aggregator.user(), # add user text to context
|
|
40
|
+
llm, # LLM generates response
|
|
41
|
+
tts, # Typecast TTS synthesis
|
|
42
|
+
transport.output(), # stream audio back to user
|
|
43
|
+
context_aggregator.assistant(), # store assistant response
|
|
44
|
+
])
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
See [`example.py`](example.py) for a complete working example including event handlers and transport setup.
|
|
48
|
+
|
|
49
|
+
### Advanced Configuration (Emotion & Audio Controls)
|
|
50
|
+
|
|
51
|
+
`TypecastTTSService` exposes structured parameter models so you can tune emotion and audio output.
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from pipecat_typecast.tts import (
|
|
55
|
+
TypecastTTSService,
|
|
56
|
+
TypecastInputParams,
|
|
57
|
+
PromptOptions,
|
|
58
|
+
OutputOptions,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
params = TypecastInputParams(
|
|
62
|
+
# Language influences pronunciation model (defaults to English)
|
|
63
|
+
# Language.EN / Language.KO / Language.JA ...
|
|
64
|
+
# If omitted, Typecast auto-detect may apply (depending on voice).
|
|
65
|
+
prompt_options=PromptOptions(
|
|
66
|
+
emotion_preset="happy", # normal | happy | sad | angry | whisper (voice dependent)
|
|
67
|
+
emotion_intensity=1.3, # 0.0–2.0 (float)
|
|
68
|
+
),
|
|
69
|
+
output_options=OutputOptions(
|
|
70
|
+
volume=110, # 0–200 (percent)
|
|
71
|
+
audio_pitch=2, # -12..12 (semitones)
|
|
72
|
+
audio_tempo=1.05, # 0.5–2.0 (playback speed)
|
|
73
|
+
audio_format="wav", # Only 'wav' currently supported
|
|
74
|
+
),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
tts = TypecastTTSService(
|
|
78
|
+
aiohttp_session=session,
|
|
79
|
+
api_key=os.getenv("TYPECAST_API_KEY"),
|
|
80
|
+
voice_id="tc_62a8975e695ad26f7fb514d1", # Replace with another voice ID as desired
|
|
81
|
+
model="ssfm-v21", # Default model
|
|
82
|
+
params=params,
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Notes:
|
|
87
|
+
- `emotion_preset` availability varies by voice. If unsupported, the service falls back to neutral.
|
|
88
|
+
- `emotion_intensity` > 1.0 increases expressiveness; extreme values can sound synthetic.
|
|
89
|
+
- `audio_pitch` shifts pitch in musical semitone units (use small adjustments for naturalness).
|
|
90
|
+
- `audio_tempo` changes speaking speed; keep within 0.85–1.15 for intelligibility.
|
|
91
|
+
- `seed` (set in `TypecastInputParams`) provides deterministic synthesis for identical text (when supported by model).
|
|
92
|
+
- Unsupported `audio_format` values yield an error frame—keep `wav`.
|
|
93
|
+
|
|
94
|
+
## Running the Example
|
|
95
|
+
|
|
96
|
+
1. Install dependencies:
|
|
97
|
+
```bash
|
|
98
|
+
uv sync
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
2. Set up your environment
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
cp env.example .env
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
3. Run:
|
|
108
|
+
```bash
|
|
109
|
+
uv run python example.py
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
The bot will create a call (e.g. Daily room) and speak responses using Typecast voices.
|
|
113
|
+
|
|
114
|
+
## Compatibility
|
|
115
|
+
|
|
116
|
+
**Tested with Pipecat v0.0.89**
|
|
117
|
+
|
|
118
|
+
- Python 3.10+
|
|
119
|
+
- Daily / Twilio / generic WebRTC transports (see `example.py`)
|
|
120
|
+
|
|
121
|
+
## License
|
|
122
|
+
|
|
123
|
+
BSD-2-Clause - see [LICENSE](LICENSE)
|
|
124
|
+
|
|
125
|
+
## Support
|
|
126
|
+
|
|
127
|
+
- Docs: https://typecast.ai (refer to API docs for voice IDs & parameters)
|
|
128
|
+
- Pipecat Discord: https://discord.gg/pipecat (`#community-integrations`)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
import aiohttp
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
15
|
+
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
16
|
+
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
17
|
+
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
18
|
+
from pipecat.frames.frames import LLMRunFrame
|
|
19
|
+
from pipecat.pipeline.pipeline import Pipeline
|
|
20
|
+
from pipecat.pipeline.runner import PipelineRunner
|
|
21
|
+
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
22
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
23
|
+
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
24
|
+
from pipecat.runner.types import RunnerArguments
|
|
25
|
+
from pipecat.runner.utils import create_transport
|
|
26
|
+
from pipecat.services.deepgram.stt import DeepgramSTTService
|
|
27
|
+
from pipecat.services.openai.llm import OpenAILLMService
|
|
28
|
+
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
29
|
+
from pipecat.transports.daily.transport import DailyParams
|
|
30
|
+
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
31
|
+
|
|
32
|
+
from pipecat_typecast import TypecastTTSService
|
|
33
|
+
|
|
34
|
+
load_dotenv(override=True)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
38
|
+
# instantiated. The function will be called when the desired transport gets
|
|
39
|
+
# selected.
|
|
40
|
+
transport_params = {
|
|
41
|
+
"daily": lambda: DailyParams(
|
|
42
|
+
audio_in_enabled=True,
|
|
43
|
+
audio_out_enabled=True,
|
|
44
|
+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
45
|
+
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
46
|
+
),
|
|
47
|
+
"twilio": lambda: FastAPIWebsocketParams(
|
|
48
|
+
audio_in_enabled=True,
|
|
49
|
+
audio_out_enabled=True,
|
|
50
|
+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
51
|
+
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
52
|
+
),
|
|
53
|
+
"webrtc": lambda: TransportParams(
|
|
54
|
+
audio_in_enabled=True,
|
|
55
|
+
audio_out_enabled=True,
|
|
56
|
+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
57
|
+
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
58
|
+
),
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
63
|
+
logger.info("Starting bot")
|
|
64
|
+
|
|
65
|
+
async with aiohttp.ClientSession() as session:
|
|
66
|
+
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
|
67
|
+
|
|
68
|
+
typecast_kwargs = {}
|
|
69
|
+
voice_id = os.getenv("TYPECAST_VOICE_ID")
|
|
70
|
+
if voice_id:
|
|
71
|
+
typecast_kwargs["voice_id"] = voice_id
|
|
72
|
+
|
|
73
|
+
tts = TypecastTTSService(
|
|
74
|
+
aiohttp_session=session,
|
|
75
|
+
api_key=os.getenv("TYPECAST_API_KEY", ""),
|
|
76
|
+
**typecast_kwargs,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
|
80
|
+
|
|
81
|
+
messages = [
|
|
82
|
+
{
|
|
83
|
+
"role": "system",
|
|
84
|
+
"content": (
|
|
85
|
+
"You are a helpful LLM in a WebRTC call. "
|
|
86
|
+
"Your goal is to demonstrate your capabilities succinctly. "
|
|
87
|
+
"Your output will be converted to audio, so avoid special characters."
|
|
88
|
+
),
|
|
89
|
+
},
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
context = LLMContext(messages)
|
|
93
|
+
context_aggregator = LLMContextAggregatorPair(context)
|
|
94
|
+
|
|
95
|
+
pipeline = Pipeline(
|
|
96
|
+
[
|
|
97
|
+
transport.input(), # Transport user input
|
|
98
|
+
stt,
|
|
99
|
+
context_aggregator.user(), # User responses
|
|
100
|
+
llm, # LLM
|
|
101
|
+
tts, # TTS
|
|
102
|
+
transport.output(), # Transport bot output
|
|
103
|
+
context_aggregator.assistant(), # Assistant spoken responses
|
|
104
|
+
]
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
task = PipelineTask(
|
|
108
|
+
pipeline,
|
|
109
|
+
params=PipelineParams(
|
|
110
|
+
enable_metrics=True,
|
|
111
|
+
enable_usage_metrics=True,
|
|
112
|
+
),
|
|
113
|
+
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
@transport.event_handler("on_client_connected")
|
|
117
|
+
async def on_client_connected(transport, client):
|
|
118
|
+
logger.info("Client connected")
|
|
119
|
+
# Kick off the conversation.
|
|
120
|
+
messages.append(
|
|
121
|
+
{"role": "system", "content": "Please introduce yourself to the user."}
|
|
122
|
+
)
|
|
123
|
+
await task.queue_frames([LLMRunFrame()])
|
|
124
|
+
|
|
125
|
+
@transport.event_handler("on_client_disconnected")
|
|
126
|
+
async def on_client_disconnected(transport, client):
|
|
127
|
+
logger.info("Client disconnected")
|
|
128
|
+
await task.cancel()
|
|
129
|
+
|
|
130
|
+
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
131
|
+
|
|
132
|
+
await runner.run(task)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
async def bot(runner_args: RunnerArguments):
|
|
136
|
+
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
137
|
+
transport = await create_transport(runner_args, transport_params)
|
|
138
|
+
await run_bot(transport, runner_args)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
if __name__ == "__main__":
|
|
142
|
+
from pipecat.runner.run import main
|
|
143
|
+
|
|
144
|
+
main()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pipecat-ai-typecast"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Typecast is an AI text-to-speech API that converts text into lifelike, expressive speech in many languages."
|
|
5
|
+
license = "BSD-2-Clause"
|
|
6
|
+
license-files = ["LICENSE"]
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
|
+
keywords = ["pipecat", "text-to-speech", "ai"]
|
|
10
|
+
authors = [
|
|
11
|
+
{name = "Neosapience", email = "help@typecast.ai"}
|
|
12
|
+
]
|
|
13
|
+
maintainers = [
|
|
14
|
+
{name = "Neosapience", email = "help@typecast.ai"}
|
|
15
|
+
]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 5 - Production/Stable",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence"
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"pipecat-ai>=0.0.94",
|
|
24
|
+
"aiohttp>=3.9.0",
|
|
25
|
+
"loguru>=0.7.0",
|
|
26
|
+
"pydantic>=2.0.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[dependency-groups]
|
|
30
|
+
dev = [
|
|
31
|
+
"pipecat-ai[daily,openai,deepgram,runner,webrtc,local-smart-turn-v3]>=0.0.94",
|
|
32
|
+
"python-dotenv>=1.0.1",
|
|
33
|
+
"onnxruntime>=1.23.2",
|
|
34
|
+
"ruff>=0.12.11",
|
|
35
|
+
"build>=1.0.0",
|
|
36
|
+
"twine>=5.0.0",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://typecast.ai"
|
|
41
|
+
Documentation = "https://typecast.ai/docs/overview"
|
|
42
|
+
Source = "https://github.com/neosapience/pipecat-typecast"
|
|
43
|
+
|
|
44
|
+
[build-system]
|
|
45
|
+
requires = ["setuptools>=64", "setuptools_scm>=8"]
|
|
46
|
+
build-backend = "setuptools.build_meta"
|
|
47
|
+
|
|
48
|
+
[tool.setuptools.packages.find]
|
|
49
|
+
where = ["src"]
|
|
50
|
+
|
|
51
|
+
[tool.ruff]
|
|
52
|
+
line-length = 100
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint]
|
|
55
|
+
select = [
|
|
56
|
+
"D", # Docstring rules
|
|
57
|
+
"I", # Import rules
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[tool.ruff.lint.per-file-ignores]
|
|
61
|
+
# Skip docstring checks for non-source code
|
|
62
|
+
"example.py" = ["D"]
|
|
63
|
+
|
|
64
|
+
[tool.ruff.lint.pydocstyle]
|
|
65
|
+
convention = "google"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# PyPI publish script
|
|
3
|
+
# Usage: ./scripts/publish.sh [testpypi|pypi]
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
REPOSITORY=${1:-pypi}
|
|
8
|
+
|
|
9
|
+
echo "Cleaning previous build artifacts..."
|
|
10
|
+
rm -rf dist/ build/ *.egg-info
|
|
11
|
+
|
|
12
|
+
echo "Building package..."
|
|
13
|
+
python -m build
|
|
14
|
+
|
|
15
|
+
echo "Build complete!"
|
|
16
|
+
|
|
17
|
+
if [ "$REPOSITORY" = "testpypi" ]; then
|
|
18
|
+
echo "Uploading to TestPyPI..."
|
|
19
|
+
twine upload --repository testpypi dist/*
|
|
20
|
+
echo "TestPyPI publish complete!"
|
|
21
|
+
echo "Test installation: pip install --index-url https://test.pypi.org/simple/ pipecat-ai-typecast"
|
|
22
|
+
else
|
|
23
|
+
echo "Uploading to PyPI..."
|
|
24
|
+
twine upload dist/*
|
|
25
|
+
echo "PyPI publish complete!"
|
|
26
|
+
fi
|
|
27
|
+
|