chichi-speech 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,41 @@
1
+
2
+ import argparse
3
+ import requests
4
+ import sys
5
+ import os
6
+
7
+ def main():
8
+ parser = argparse.ArgumentParser(description="Client for Qwen3 TTS Service")
9
+ parser.add_argument("text", help="Text to synthesize")
10
+ parser.add_argument("-l", "--language", default="Chinese", help="Language code (default: Chinese)")
11
+ parser.add_argument("-o", "--output", default="output.wav", help="Output WAV file path")
12
+ parser.add_argument("--url", default="http://localhost:9090", help="Service URL (default: http://localhost:9090)")
13
+
14
+ args = parser.parse_args()
15
+
16
+ endpoint = f"{args.url.rstrip('/')}/synthesize"
17
+ payload = {
18
+ "text": args.text,
19
+ "language": args.language
20
+ }
21
+
22
+ print(f"Sending request to {endpoint}...")
23
+ try:
24
+ response = requests.post(endpoint, json=payload, stream=True)
25
+ response.raise_for_status()
26
+
27
+ with open(args.output, "wb") as f:
28
+ for chunk in response.iter_content(chunk_size=8192):
29
+ f.write(chunk)
30
+
31
+ print(f"Success! Audio saved to: {os.path.abspath(args.output)}")
32
+
33
+ except requests.exceptions.ConnectionError:
34
+ print(f"Error: Could not connect to service at {args.url}. Is the server running?")
35
+ sys.exit(1)
36
+ except Exception as e:
37
+ print(f"Error: {e}")
38
+ sys.exit(1)
39
+
40
+ if __name__ == "__main__":
41
+ main()
@@ -0,0 +1,117 @@
1
+ import uvicorn
2
+ from fastapi import FastAPI, HTTPException
3
+ from fastapi.responses import StreamingResponse
4
+ from pydantic import BaseModel
5
+ import torch
6
+ import soundfile as sf
7
+ import io
8
+ from qwen_tts import Qwen3TTSModel
9
+
10
+ # Initialize FastAPI app
11
+ app = FastAPI(title="ChiChi Speech Service")
12
+
13
+ # Global variables
14
+ model = None
15
+ VOICE_PROMPT = None
16
+
17
+ # Hardcoded reference constants for voice cloning
18
+ # REF_AUDIO = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-TTS-Repo/clone_2.wav"
19
+ # REF_TEXT = "Okay. Yeah. I resent you. I love you. I respect you. But you know what? You blew it! And thanks to you."
20
+ REF_AUDIO = "/Users/stevenliu/Downloads/0126_ori_voice.WAV"
21
+ REF_TEXT = "你那日问我,我的眼里是如何看待我?元氏父兄我打你,他们试着训过,终点并非叛贼。你总说守护江阴义不容辞,守护江阴义不容辞,可我怕你守到这最后,你也住了那史书上殉国忠烈。"
22
+
23
+
24
+ @app.on_event("startup")
25
+ async def startup_event():
26
+ global model, VOICE_PROMPT
27
+ print("Loading Qwen3 TTS Model...")
28
+ # Initialize the model
29
+ # Using the same parameters as voice_clone_basic.py
30
+ model = Qwen3TTSModel.from_pretrained(
31
+ "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
32
+ device_map="mps",
33
+ dtype=torch.float32,
34
+ )
35
+
36
+ print("Creating Voice Clone Prompt...")
37
+ # Pre-compute the prompt using the hardcoded reference audio and text
38
+ # This corresponds to: prompt_items = tts.create_voice_clone_prompt(...)
39
+ # We default x_vector_only_mode to False as the variable 'xvec_only'
40
+ # from the snippet is unknown, and typically such flags are optional.
41
+ VOICE_PROMPT = model.create_voice_clone_prompt(
42
+ ref_audio=REF_AUDIO,
43
+ ref_text=REF_TEXT,
44
+ # x_vector_only_mode=False
45
+ )
46
+ print("Service Ready.")
47
+
48
+ class SynthesisRequest(BaseModel):
49
+ text: str
50
+ language: str = "Chinese"
51
+
52
+ @app.post("/synthesize")
53
+ async def synthesize(request: SynthesisRequest):
54
+ """
55
+ Synthesize speech using the pre-loaded voice clone prompt.
56
+ """
57
+ if not model or VOICE_PROMPT is None:
58
+ raise HTTPException(status_code=503, detail="Service not initialized")
59
+
60
+ try:
61
+ # Generate the voice clone
62
+ # corresponding to: return tts.generate_voice_clone(...)
63
+ wavs, sr = model.generate_voice_clone(
64
+ text=request.text,
65
+ language=request.language,
66
+ voice_clone_prompt=VOICE_PROMPT,
67
+ )
68
+
69
+ # wavs[0] is the audio data. Ensure it's a numpy array for soundfile.
70
+ audio_data = wavs[0]
71
+ if hasattr(audio_data, "cpu"):
72
+ audio_data = audio_data.cpu().float().numpy()
73
+
74
+ # Write to an in-memory buffer
75
+ buffer = io.BytesIO()
76
+ sf.write(buffer, audio_data, sr, format='WAV')
77
+ buffer.seek(0)
78
+
79
+ return StreamingResponse(buffer, media_type="audio/wav")
80
+
81
+ except Exception as e:
82
+ import traceback
83
+ traceback.print_exc()
84
+ raise HTTPException(status_code=500, detail=str(e))
85
+
86
+ def main():
87
+ import argparse
88
+ import os
89
+
90
+ parser = argparse.ArgumentParser(description="Qwen3 TTS Service")
91
+ parser.add_argument("--port", type=int, default=9090, help="Service port (default: 9090)")
92
+ parser.add_argument("--host", type=str, default="0.0.0.0", help="Service host (default: 0.0.0.0)")
93
+ parser.add_argument("--ref-audio", type=str, nargs="+", help="Path(s) to reference audio file(s) for voice cloning")
94
+ parser.add_argument("--ref-text", type=str, nargs="+", help="Reference text content(s) corresponding to the audio")
95
+
96
+ args = parser.parse_args()
97
+
98
+ # Environment variable overrides
99
+ if "PORT" in os.environ:
100
+ args.port = int(os.environ["PORT"])
101
+
102
+ # Update global configuration if arguments specific
103
+ global REF_AUDIO, REF_TEXT
104
+ if args.ref_audio:
105
+ REF_AUDIO = args.ref_audio
106
+ if args.ref_text:
107
+ REF_TEXT = args.ref_text
108
+
109
+ print(f"Starting server on {args.host}:{args.port}")
110
+ if args.ref_audio:
111
+ print(f"Overriding reference audio: {args.ref_audio}")
112
+
113
+ # Run the server
114
+ uvicorn.run(app, host=args.host, port=args.port)
115
+
116
+ if __name__ == "__main__":
117
+ main()
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: chichi-speech
3
+ Version: 0.1.0
4
+ Summary: Qwen3 TTS Service with Voice Cloning
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: fastapi
7
+ Requires-Dist: numba>=0.59.0
8
+ Requires-Dist: pydantic
9
+ Requires-Dist: qwen-tts
10
+ Requires-Dist: requests
11
+ Requires-Dist: soundfile
12
+ Requires-Dist: torch
13
+ Requires-Dist: uvicorn
14
+ Provides-Extra: dev
15
+ Requires-Dist: build; extra == 'dev'
16
+ Requires-Dist: httpx; extra == 'dev'
17
+ Requires-Dist: pytest; extra == 'dev'
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Chichi Speech
21
+
22
+ **A high-quality, voice-cloning TTS service powered by Qwen3.**
23
+
24
+ Chichi Speech provides a robust REST API and CLI tools for text-to-speech synthesis, featuring efficient voice cloning capabilities. It is designed to be easily deployed or integrated into other AI agents and workflows.
25
+
26
+ ## Features
27
+
28
+ - **High Quality**: Utilizes the Qwen3-TTS model for state-of-the-art speech synthesis.
29
+ - **Voice Cloning**: Clone voices from reference audio files.
30
+ - **Efficient**: Optimized for reusing voice prompts to minimize computation for repeated requests.
31
+ - **Standardized API**: Simple REST API (`/synthesize`) for easy integration.
32
+ - **CLI Tools**: Includes `chichi-speech-server` and `chichi-speech-client` for immediate use.
33
+
34
+ ## Installation
35
+
36
+ Prerequisites: `git`, `uv`, `python >= 3.10`.
37
+
38
+ ```bash
39
+ export CHICHI_SPEECH_HOME="~/chichi-speech/"
40
+ export CHICHI_SPEECH_ENV="~/chichi-speech/.venv"
41
+ git clone https://github.com/yourusername/chichi-speech.git $CHICHI_SPEECH_HOME
42
+ cd $CHICHI_SPEECH_HOME
43
+
44
+ uv venv $CHICHI_SPEECH_ENV --python 3.10
45
+ source $CHICHI_SPEECH_ENV/bin/activate
46
+
47
+ uv pip install -e .
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ### 1. Start the Service
53
+
54
+ The service runs on port **9090** by default.
55
+
56
+ ```bash
57
+ # Start the server (runs in foreground, use & for background or a separate terminal)
58
+ source $$CHICHI_SPEECH_ENV/bin/activate
59
+ chichi-speech-server
60
+ # OR specify the port explicitly
61
+ chichi-speech-server --port 9090 --host 0.0.0.0
62
+ # OR specify your reference audio and text for voice cloning (Recommended)
63
+ chichi-speech-server --ref-audio /path/to/my/voice.wav --ref-text "caption of the reference audio"
64
+ ```
65
+
66
+ ### 2. Verify Service is Running
67
+ Check the health/docs:
68
+ ```bash
69
+ curl http://localhost:9090/docs
70
+ ```
71
+
72
+ ### 3. Generate Speech
73
+
74
+ Use cURL:
75
+ ```bash
76
+ curl -X POST "http://localhost:9090/synthesize" \
77
+ -H "Content-Type: application/json" \
78
+ -d '{
79
+ "text": "Nice to meet you",
80
+ "language": "English"
81
+ }' \
82
+ --output output/nice_to_meet.wav
83
+ ```
84
+
85
+ ## Functionality
86
+ - **Endpoint**: `POST /synthesize`
87
+ - **Default Port**: 9090
88
+ - **Voice Cloning**: Uses a pre-computed voice prompt from reference files to ensure the cloned voice is consistent and generation is fast.
89
+
90
+ # Development
91
+
92
+ Install dev dependencies:
93
+ ```bash
94
+ uv pip install -e ".[dev]"
95
+ ```
96
+
97
+ Run tests:
98
+ ```bash
99
+ pytest
100
+ ```
101
+
102
+ ## License
103
+
104
+ MIT
@@ -0,0 +1,7 @@
1
+ chichi_speech/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ chichi_speech/client.py,sha256=GYIxqKy1ONsOxNMx83gMOT1cM6K0rdjWSR_lnPiLk8U,1381
3
+ chichi_speech/server.py,sha256=JnC7A7casb3q_HRot9sSjgA_41E-H7lnJHr_tHOtSMg,4204
4
+ chichi_speech-0.1.0.dist-info/METADATA,sha256=tL48c1POhy-6QQXlmEMbTx4Z3k1GHix-3ivbEZN3-ps,2844
5
+ chichi_speech-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ chichi_speech-0.1.0.dist-info/entry_points.txt,sha256=tRZ0iWPkkhzyEEQUSnGhyjbFJwxX49JkyqfU5xPR9yM,116
7
+ chichi_speech-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ chichi-speech-client = chichi_speech.client:main
3
+ chichi-speech-server = chichi_speech.server:main