lunavox 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. lunavox-2.0.0/LICENSE +241 -0
  2. lunavox-2.0.0/PKG-INFO +208 -0
  3. lunavox-2.0.0/README.md +169 -0
  4. lunavox-2.0.0/pyproject.toml +60 -0
  5. lunavox-2.0.0/setup.cfg +4 -0
  6. lunavox-2.0.0/src/lunavox/__init__.py +6 -0
  7. lunavox-2.0.0/src/lunavox/__main__.py +6 -0
  8. lunavox-2.0.0/src/lunavox/build/__init__.py +16 -0
  9. lunavox-2.0.0/src/lunavox/build/base.py +259 -0
  10. lunavox-2.0.0/src/lunavox/build/context.py +24 -0
  11. lunavox-2.0.0/src/lunavox/build/lib_downloader.py +250 -0
  12. lunavox-2.0.0/src/lunavox/build/libs.json +81 -0
  13. lunavox-2.0.0/src/lunavox/build/linux.py +24 -0
  14. lunavox-2.0.0/src/lunavox/build/macos.py +30 -0
  15. lunavox-2.0.0/src/lunavox/build/main.py +126 -0
  16. lunavox-2.0.0/src/lunavox/build/windows.py +153 -0
  17. lunavox-2.0.0/src/lunavox/cli/__init__.py +2 -0
  18. lunavox-2.0.0/src/lunavox/cli/main.py +452 -0
  19. lunavox-2.0.0/src/lunavox/core/__init__.py +2 -0
  20. lunavox-2.0.0/src/lunavox/core/deps.py +126 -0
  21. lunavox-2.0.0/src/lunavox/core/project.py +51 -0
  22. lunavox-2.0.0/src/lunavox/core/ui.py +17 -0
  23. lunavox-2.0.0/src/lunavox/model/__init__.py +8 -0
  24. lunavox-2.0.0/src/lunavox/model/config.py +58 -0
  25. lunavox-2.0.0/src/lunavox/model/conversion/__init__.py +2 -0
  26. lunavox-2.0.0/src/lunavox/model/conversion/convert_talker_predictor_llama.py +321 -0
  27. lunavox-2.0.0/src/lunavox/model/conversion/convert_tts_to_gguf.py +800 -0
  28. lunavox-2.0.0/src/lunavox/model/conversion/export_embeddings.py +98 -0
  29. lunavox-2.0.0/src/lunavox/model/conversion/export_onnx_models.py +298 -0
  30. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/__init__.py +1 -0
  31. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/convert_hf_to_gguf.py +11433 -0
  32. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/__init__.py +9 -0
  33. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/constants.py +3735 -0
  34. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/gguf.py +15 -0
  35. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/gguf_reader.py +367 -0
  36. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/gguf_writer.py +1271 -0
  37. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/lazy.py +228 -0
  38. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/metadata.py +731 -0
  39. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/py.typed +0 -0
  40. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/quants.py +1318 -0
  41. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/scripts/gguf_convert_endian.py +186 -0
  42. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/scripts/gguf_dump.py +477 -0
  43. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/scripts/gguf_editor_gui.py +1621 -0
  44. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/scripts/gguf_hash.py +102 -0
  45. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/scripts/gguf_new_metadata.py +216 -0
  46. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/scripts/gguf_set_metadata.py +95 -0
  47. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/tensor_mapping.py +1897 -0
  48. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/utility.py +340 -0
  49. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/gguf/vocab.py +891 -0
  50. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/tokenizer_12hz/__init__.py +2 -0
  51. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/tokenizer_12hz/configuration_qwen3_tts_tokenizer_v2.py +172 -0
  52. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/tokenizer_12hz/internal/__init__.py +2 -0
  53. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/tokenizer_12hz/internal/configuration_mimi.py +279 -0
  54. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/tokenizer_12hz/internal/modeling_mimi.py +1770 -0
  55. lunavox-2.0.0/src/lunavox/model/conversion/hf_export/tokenizer_12hz/modeling_qwen3_tts_tokenizer_v2.py +1161 -0
  56. lunavox-2.0.0/src/lunavox/model/conversion/inspect_models.py +269 -0
  57. lunavox-2.0.0/src/lunavox/model/conversion/onnx_export_wrappers.py +235 -0
  58. lunavox-2.0.0/src/lunavox/model/conversion/speaker_encoder_local.py +265 -0
  59. lunavox-2.0.0/src/lunavox/model/conversion/validate_onnx_models.py +139 -0
  60. lunavox-2.0.0/src/lunavox/model/downloader.py +56 -0
  61. lunavox-2.0.0/src/lunavox/model/pipeline.py +456 -0
  62. lunavox-2.0.0/src/lunavox.egg-info/PKG-INFO +208 -0
  63. lunavox-2.0.0/src/lunavox.egg-info/SOURCES.txt +65 -0
  64. lunavox-2.0.0/src/lunavox.egg-info/dependency_links.txt +1 -0
  65. lunavox-2.0.0/src/lunavox.egg-info/entry_points.txt +2 -0
  66. lunavox-2.0.0/src/lunavox.egg-info/requires.txt +31 -0
  67. lunavox-2.0.0/src/lunavox.egg-info/top_level.txt +1 -0
lunavox-2.0.0/LICENSE ADDED
@@ -0,0 +1,241 @@
1
+ Lunavox Project License
2
+ =======================
3
+
4
+ This project involves both original software code and derivative model weights.
5
+ The licensing terms are structured as follows:
6
+
7
+ 1. SOFTWARE CODE LICENSE
8
+ ------------------------
9
+ The C++ source code, build scripts, and optimization logic original to the
10
+ Lunavox project are licensed under the MIT License:
11
+
12
+ MIT License
13
+
14
+ Copyright (c) 2026 Waiyuk Kwong
15
+
16
+ Permission is hereby granted, free of charge, to any person obtaining a copy
17
+ of this software and associated documentation files (the "Software"), to deal
18
+ in the Software without restriction, including without limitation the rights
19
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20
+ copies of the Software, and to permit persons to whom the Software is
21
+ furnished to do so, subject to the following conditions:
22
+
23
+ The above copyright notice and this permission notice shall be included in all
24
+ copies or substantial portions of the Software.
25
+
26
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32
+ SOFTWARE.
33
+
34
+
35
+ 2. MODEL AND DERIVATIVE WORKS LICENSE
36
+ -------------------------------------
37
+ The model weights, configurations, and derivative quantized artifacts (including
38
+ but not limited to GGUF and ONNX formats) are derived from Qwen3-TTS and are
39
+ subject to the Apache License, Version 2.0 provided by Alibaba Cloud:
40
+
41
+ Apache License
42
+ Version 2.0, January 2004
43
+ http://www.apache.org/licenses/
44
+
45
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
46
+
47
+ 1. Definitions.
48
+
49
+ "License" shall mean the terms and conditions for use, reproduction,
50
+ and distribution as defined by Sections 1 through 9 of this document.
51
+
52
+ "Licensor" shall mean the copyright owner or entity authorized by
53
+ the copyright owner that is granting the License.
54
+
55
+ "Legal Entity" shall mean the union of the acting entity and all
56
+ other entities that control, are controlled by, or are under common
57
+ control with that entity. For the purposes of this definition,
58
+ "control" means (i) the power, direct or indirect, to cause the
59
+ direction or management of such entity, whether by contract or
60
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
61
+ outstanding shares, or (iii) beneficial ownership of such entity.
62
+
63
+ "You" (or "Your") shall mean an individual or Legal Entity
64
+ exercising permissions granted by this License.
65
+
66
+ "Source" form shall mean the preferred form for making modifications,
67
+ including but not limited to software source code, documentation
68
+ source, and configuration files.
69
+
70
+ "Object" form shall mean any form resulting from mechanical
71
+ transformation or translation of a Source form, including but
72
+ not limited to compiled object code, generated documentation,
73
+ and conversions to other media types.
74
+
75
+ "Work" shall mean the work of authorship, whether in Source or
76
+ Object form, made available under the License, as indicated by a
77
+ copyright notice that is included in or attached to the work
78
+ (an example is provided in the Appendix below).
79
+
80
+ "Derivative Works" shall mean any work, whether in Source or Object
81
+ form, that is based on (or derived from) the Work and for which the
82
+ editorial revisions, annotations, elaborations, or other modifications
83
+ represent, as a whole, an original work of authorship. For the purposes
84
+ of this License, Derivative Works shall not include works that remain
85
+ separable from, or merely link (or bind by name) to the interfaces of,
86
+ the Work and Derivative Works thereof.
87
+
88
+ "Contribution" shall mean any work of authorship, including
89
+ the original version of the Work and any modifications or additions
90
+ to that Work or Derivative Works thereof, that is intentionally
91
+ submitted to Licensor for inclusion in the Work by the copyright owner
92
+ or by an individual or Legal Entity authorized to submit on behalf of
93
+ the copyright owner. For the purposes of this definition, "submitted"
94
+ means any form of electronic, verbal, or written communication sent
95
+ to the Licensor or its representatives, including but not limited to
96
+ communication on electronic mailing lists, source code control systems,
97
+ and issue tracking systems that are managed by, or on behalf of, the
98
+ Licensor for the purpose of discussing and improving the Work, but
99
+ excluding communication that is conspicuously marked or otherwise
100
+ designated in writing by the copyright owner as "Not a Contribution."
101
+
102
+ "Contributor" shall mean Licensor and any individual or Legal Entity
103
+ on behalf of whom a Contribution has been received by Licensor and
104
+ subsequently incorporated within the Work.
105
+
106
+ 2. Grant of Copyright License. Subject to the terms and conditions of
107
+ this License, each Contributor hereby grants to You a perpetual,
108
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
109
+ copyright license to reproduce, prepare Derivative Works of,
110
+ publicly display, publicly perform, sublicense, and distribute the
111
+ Work and such Derivative Works in Source or Object form.
112
+
113
+ 3. Grant of Patent License. Subject to the terms and conditions of
114
+ this License, each Contributor hereby grants to You a perpetual,
115
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
116
+ (except as stated in this section) patent license to make, have made,
117
+ use, offer to sell, sell, import, and otherwise transfer the Work,
118
+ where such license applies only to those patent claims licensable
119
+ by such Contributor that are necessarily infringed by their
120
+ Contribution(s) alone or by combination of their Contribution(s)
121
+ with the Work to which such Contribution(s) was submitted. If You
122
+ institute patent litigation against any entity (including a
123
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
124
+ or a Contribution incorporated within the Work constitutes direct
125
+ or contributory patent infringement, then any patent licenses
126
+ granted to You under this License for that Work shall terminate
127
+ as of the date such litigation is filed.
128
+
129
+ 4. Redistribution. You may reproduce and distribute copies of the
130
+ Work or Derivative Works thereof in any medium, with or without
131
+ modifications, and in Source or Object form, provided that You
132
+ meet the following conditions:
133
+
134
+ (a) You must give any other recipients of the Work or
135
+ Derivative Works a copy of this License; and
136
+
137
+ (b) You must cause any modified files to carry prominent notices
138
+ stating that You changed the files; and
139
+
140
+ (c) You must retain, in the Source form of any Derivative Works
141
+ that You distribute, all copyright, patent, trademark, and
142
+ attribution notices from the Source form of the Work,
143
+ excluding those notices that do not pertain to any part of
144
+ the Derivative Works; and
145
+
146
+ (d) If the Work includes a "NOTICE" text file as part of its
147
+ distribution, then any Derivative Works that You distribute must
148
+ include a readable copy of the attribution notices contained
149
+ within such NOTICE file, excluding those notices that do not
150
+ pertain to any part of the Derivative Works, in at least one
151
+ of the following places: within a NOTICE text file distributed
152
+ as part of the Derivative Works; within the Source form or
153
+ documentation, if provided along with the Derivative Works; or,
154
+ within a display generated by the Derivative Works, if and
155
+ wherever such third-party notices normally appear. The contents
156
+ of the NOTICE file are for informational purposes only and
157
+ do not modify the License. You may add Your own attribution
158
+ notices within Derivative Works that You distribute, alongside
159
+ or as an addendum to the NOTICE text from the Work, provided
160
+ that such additional attribution notices cannot be construed
161
+ as modifying the License.
162
+
163
+ You may add Your own copyright statement to Your modifications and
164
+ may provide additional or different license terms and conditions
165
+ for use, reproduction, or distribution of Your modifications, or
166
+ for any such Derivative Works as a whole, provided Your use,
167
+ reproduction, and distribution of the Work otherwise complies with
168
+ the conditions stated in this License.
169
+
170
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
171
+ any Contribution intentionally submitted for inclusion in the Work
172
+ by You to the Licensor shall be under the terms and conditions of
173
+ this License, without any additional terms or conditions.
174
+ Notwithstanding the above, nothing herein shall supersede or modify
175
+ the terms of any separate license agreement you may have executed
176
+ with Licensor regarding such Contributions.
177
+
178
+ 6. Trademarks. This License does not grant permission to use the trade
179
+ names, trademarks, service marks, or product names of the Licensor,
180
+ except as required for reasonable and customary use in describing the
181
+ origin of the Work and reproducing the content of the NOTICE file.
182
+
183
+ 7. Disclaimer of Warranty. Unless required by applicable law or
184
+ agreed to in writing, Licensor provides the Work (and each
185
+ Contributor provides its Contributions) on an "AS IS" BASIS,
186
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
187
+ implied, including, without limitation, any warranties or conditions
188
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
189
+ PARTICULAR PURPOSE. You are solely responsible for determining the
190
+ appropriateness of using or redistributing the Work and assume any
191
+ risks associated with Your exercise of permissions under this License.
192
+
193
+ 8. Limitation of Liability. In no event and under no legal theory,
194
+ whether in tort (including negligence), contract, or otherwise,
195
+ unless required by applicable law (such as deliberate and grossly
196
+ negligent acts) or agreed to in writing, shall any Contributor be
197
+ liable to You for damages, including any direct, indirect, special,
198
+ incidental, or consequential damages of any character arising as a
199
+ result of this License or out of the use or inability to use the
200
+ Work (including but not limited to damages for loss of goodwill,
201
+ work stoppage, computer failure or malfunction, or any and all
202
+ other commercial damages or losses), even if such Contributor
203
+ has been advised of the possibility of such damages.
204
+
205
+ 9. Accepting Warranty or Additional Liability. While redistributing
206
+ the Work or Derivative Works thereof, You may choose to offer,
207
+ and charge a fee for, acceptance of support, warranty, indemnity,
208
+ or other liability obligations and/or rights consistent with this
209
+ License. However, in accepting such obligations, You may act only
210
+ on Your own behalf and on Your sole responsibility, not on behalf
211
+ of any other Contributor, and only if You agree to indemnify,
212
+ defend, and hold each Contributor harmless for any liability
213
+ incurred by, or claims asserted against, such Contributor by reason
214
+ of your accepting any such warranty or additional liability.
215
+
216
+ END OF TERMS AND CONDITIONS
217
+
218
+ APPENDIX: How to apply the Apache License to your work.
219
+
220
+ To apply the Apache License to your work, attach the following
221
+ boilerplate notice, with the fields enclosed by brackets "[]"
222
+ replaced with your own identifying information. (Don't include
223
+ the brackets!) The text should be enclosed in the appropriate
224
+ comment syntax for the file format. We also recommend that a
225
+ file or class name and description of purpose be included on the
226
+ same "printed page" as the copyright notice for easier
227
+ identification within third-party archives.
228
+
229
+ Copyright 2026 Alibaba Cloud
230
+
231
+ Licensed under the Apache License, Version 2.0 (the "License");
232
+ you may not use this file except in compliance with the License.
233
+ You may obtain a copy of the License at
234
+
235
+ http://www.apache.org/licenses/LICENSE-2.0
236
+
237
+ Unless required by applicable law or agreed to in writing, software
238
+ distributed under the License is distributed on an "AS IS" BASIS,
239
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
240
+ See the License for the specific language governing permissions and
241
+ limitations under the License.
lunavox-2.0.0/PKG-INFO ADDED
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: lunavox
3
+ Version: 2.0.0
4
+ Summary: LunaVox tooling CLI for model setup, conversion, quantization, and build workflows.
5
+ Author: LunaVox Contributors
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: typer>=0.12.0
11
+ Requires-Dist: rich>=13.7.0
12
+ Requires-Dist: huggingface_hub>=0.24.0
13
+ Provides-Extra: gui
14
+ Requires-Dist: customtkinter>=5.2.0; extra == "gui"
15
+ Requires-Dist: pygame>=2.5.0; extra == "gui"
16
+ Provides-Extra: convert
17
+ Requires-Dist: torch>=2.1; extra == "convert"
18
+ Requires-Dist: torchaudio>=2.1; extra == "convert"
19
+ Requires-Dist: transformers==4.57.6; extra == "convert"
20
+ Requires-Dist: accelerate==1.12.0; extra == "convert"
21
+ Requires-Dist: onnx; extra == "convert"
22
+ Requires-Dist: onnxruntime; extra == "convert"
23
+ Requires-Dist: onnxruntime-tools; extra == "convert"
24
+ Requires-Dist: gguf; extra == "convert"
25
+ Requires-Dist: numpy; extra == "convert"
26
+ Requires-Dist: tqdm; extra == "convert"
27
+ Requires-Dist: safetensors; extra == "convert"
28
+ Requires-Dist: sentencepiece; extra == "convert"
29
+ Requires-Dist: librosa; extra == "convert"
30
+ Requires-Dist: scipy; extra == "convert"
31
+ Requires-Dist: soundfile; extra == "convert"
32
+ Requires-Dist: einops; extra == "convert"
33
+ Requires-Dist: onnxscript; extra == "convert"
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=8.2; extra == "dev"
36
+ Requires-Dist: build>=1.2; extra == "dev"
37
+ Requires-Dist: twine>=5.1; extra == "dev"
38
+ Dynamic: license-file
39
+
40
+ # 🌌 LunaVox: Qwen3-TTS C++ 高性能推理引擎
41
+
42
+ ![Version](https://img.shields.io/badge/version-2.0.0-blueviolet?style=for-the-badge)
43
+ ![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20Linux%20%7C%20macOS-0078d7?style=for-the-badge&logo=windows&logoColor=white)
44
+ ![CoreML](https://img.shields.io/badge/iOS-CoreML-000000?style=for-the-badge&logo=apple&logoColor=white)
45
+ ![C++](https://img.shields.io/badge/C++-17-00599C?style=for-the-badge&logo=c%2B%2B)
46
+ [![License](https://img.shields.io/badge/license-MIT-green?style=for-the-badge)](../../LICENSE)
47
+
48
+ **LunaVox** 是专为 **Qwen3-TTS** 打造的高性能 C++ 推理引擎。它通过精简的架构设计和深度的硬件优化,提供了极致的语音合成速度与灵活性。无论是本地嵌入式设备、桌面应用还是高性能服务器,LunaVox 都能提供稳定、低延迟的 TTS 体感。
49
+
50
+ ---
51
+
52
+ ## 🚀 核心特性
53
+
54
+ - **轻量级运行**: 仅需 ONNX Runtime 与自定义 Llama 推理库,无需繁重的 Python 环境即可运行。
55
+ - **多语言原生支持**: 引擎链路内置自动语言检测,完美支持 **中、英、日、韩、俄、德、法、意、西、葡** 十种语言。
56
+ - **全模式支持**: 支持 基础合成 (Base)、声音克隆 (Clone)、定制定制声音 (Custom) 及 创意声音设计 (Design)。
57
+ - **现代构建系统**: 全自动工具链识别。支持 Windows (MSVC)、Linux (GCC) 及 macOS (Clang/Apple Silicon)。
58
+ - **跨平台硬件加速**: 深度集成 CUDA (NVIDIA), CoreML/Metal (Apple), DML (DirectX 12) 与 Vulkan 接口。
59
+
60
+ ---
61
+
62
+ ## 🛠️ 环境与构建要求
63
+
64
+ ### 1. 系统环境
65
+ - **Windows**: Windows 10/11 (VS 2022/2025 支持)
66
+ - **Linux**: Ubuntu 22.04+ 或主流发行版 (GCC >= 9.0)
67
+ - **macOS**: Apple Silicon (M1/M2/M3), macOS 12+ (Metal 支持)
68
+ - **编译器**: MSVC (v143/v144)、GCC 10.0+ 或 Apple Clang
69
+ - **构建工具**: CMake 3.16+,建议安装 **Ninja** 提升构建速度
70
+
71
+ ### 2. 依赖库
72
+ - **Python 3.10+**: 用于模型转换和自动化管理。
73
+ - **ONNX Runtime SDK**: 对应平台的 C++ 动态库。
74
+ - **Llama Runtime**: 预编译的后端二进程文件。
75
+
76
+ ---
77
+
78
+ ## 📊 性能评估
79
+
80
+ 下表展示了 LunaVox 在不同后端配置下的平均性能表现。详细报告请参阅 **[Windows 性能评估报告](benchmark/windows_performance.md)**。
81
+
82
+ | 测试配置 | 平均 RTF | 峰值内存 (RAM) | 显存 (VRAM) | 相对加速比 (Speedup) |
83
+ | :--- | :---: | :---: | :---: | :---: |
84
+ | **Baseline (CPU)** | 5.066 | 5.06 GB | — | 1.00x |
85
+ | **Baseline (GPU)** | 3.788 | 1.59 GB | 2.29 GB | 1.34x |
86
+ | **LunaVox (Full CPU)** | 1.152 | 1.06 GB | — | 4.40x |
87
+ | **LunaVox (CUDA13)** | 0.254 | 1.39 GB | 1.30 GB | 19.94x |
88
+ | **LunaVox (Llama.cpp (Vulkan) / ORT (DML))** | **0.206** | 0.91 GB | 1.05 GB | **24.59x** |
89
+
90
+ > [!NOTE]
91
+ > - **测试模型**: 基于 **Qwen3-TTS-12Hz-0.6B-Base**,开启声音克隆模式并在使用 `.json` 预计算特征文件作为参考。
92
+ > - **测试环境**: Intel i9-12900K + NVIDIA RTX 3090
93
+ > - **测试标准**: 在 **3 次预热**后,取 **10 次运行**的平均结果。
94
+
95
+ ---
96
+
97
+ ### 3. CLI 工具与依赖安装
98
+
99
+ ```powershell
100
+ # 安装核心推理工具
101
+ pip install lunavox
102
+ ```
103
+
104
+ > [!NOTE]
105
+ > **开发与脚本说明**: LunaVox 已发布至 PyPI,标准用户仅需执行 `pip install lunavox` 即可安装完整工具。若您需要深入研究模型转换、量化流水线或导出 Python 脚本,请切换至 **`dev`** 分支获取最新源码与内部工具。
106
+
107
+ ## 📦 快速上手流程 (One-Key Setup)
108
+
109
+ LunaVox 推荐使用 `bootstrap` 指令一键完成 **模型拉取、运行库下载、项目构建及交互测试**。
110
+
111
+ ### 1. 自动引导安装 (推荐)
112
+ ```powershell
113
+ # 执行全自动引导设置
114
+ lunavox bootstrap
115
+ ```
116
+
117
+ ### 2. 本地构建 (从源码)
118
+ 如果您需要精细化控制每个步骤,可以运行:
119
+ ```powershell
120
+ # 1. 下载预转换模型 (或使用 convert 本地转换原始模型)
121
+ lunavox pull-model
122
+
123
+ # 2. 下载 C++ 运行库
124
+ lunavox download-libs
125
+
126
+ # 3. 自动编译项目
127
+ lunavox build --clean
128
+ ```
129
+
130
+ > [!TIP]
131
+ > 更多详细命令和高级参数说明,请参阅 **[LunaVox CLI 指令汇总手册](guide/cli_reference.md)**。
132
+
133
+ ---
134
+
135
+ ## 🧱 运行库依赖 (Libraries)
136
+
137
+ LunaVox 自动下载 `lib/` 下相应的 ONNX Runtime 与 Llama.cpp。如果您需要针对 CUDA 环境进行精细化配置,请参阅:
138
+ - **[CUDA 12 Windows 依赖指南](install/cuda12_windows.md)**
139
+ - **[CUDA 13 Windows 依赖指南](install/cuda13_windows.md)**
140
+
141
+ ---
142
+
143
+
144
+ ## 🎙️ 推理测试与模式说明
145
+
146
+ 编译完成后,可执行程序位于 `./build/qwen3-tts-cli.exe`。
147
+ > [!NOTE]
148
+ > - Linux/macOS 系统请使用 `./build/qwen3-tts-cli` 运行。
149
+ > - `--instruct` 仅对 **Custom** 和 **Design** 模式有效(Base 模式下禁用)。
150
+
151
+ 详细教程请参阅:**[CLI 指令使用指南](guide/usage_tutorial.md)**。
152
+
153
+ ### 1. 声音克隆 (Voice Cloning)
154
+ 通过参考音频(.wav)或预计算特征(.json)模仿特定音色:
155
+ ```bash
156
+ ./build/qwen3-tts-cli.exe `
157
+ -m models/base_small `
158
+ -r ref/ref_0.6B.json `
159
+ -t "Okay, fine, I'm just gonna leave this sock monkey here. Goodbye." `
160
+ -o output/cloned.wav
161
+ ```
162
+
163
+ ### 2. 定制化声音 (Custom Voice)
164
+ 使用系统内置的发音人 ID:
165
+ ```bash
166
+ ./build/qwen3-tts-cli.exe `
167
+ -m models/custom `
168
+ --speaker Vivian `
169
+ --instruct "Use angry tone." `
170
+ -t "She said she would be here by noon." `
171
+ -o output/custom.wav
172
+ ```
173
+
174
+ ### 3. 声音设计 (Voice Design)
175
+ 使用描述设计声音
176
+ ```bash
177
+ .\build\qwen3-tts-cli.exe `
178
+ -m models/design `
179
+ -t "It's in the top drawer... wait, it's empty? No way, that's impossible! I'm sure I put it there!" `
180
+ --instruct "Speak in an incredulous tone, but with a hint of panic beginning to creep into your voice."
181
+ -o output/out.wav `
182
+
183
+ ---
184
+
185
+ ## 📈 性能监控与日志
186
+
187
+ - **详细统计**: 运行命令时添加 `--stats-json report.json` 即可获取 RTF(实时率)和内存占用分析。
188
+ - **日志查看**: 所有的构建和运行输出均实时记录在 `../../logs/latest.log` 中。
189
+ - **线程控制**: 使用 `-j` 参数(默认 4)调整 CPU 线程使用。
190
+
191
+ ---
192
+
193
+ ## 📜 更多信息
194
+
195
+ 有关运行时详细配置及设计准则,请参阅:
196
+ - **[LunaVox CLI 指令汇总手册](guide/cli_reference.md)**
197
+ - **[运行时技术规范与约束](technical/runtime_specs.md)**
198
+ - **[合成链路编码器需求分析](technical/synthesis_pathway.md)**
199
+
200
+ ---
201
+
202
+ ## 🙏 致谢
203
+
204
+ 本项目深受以下开源项目的启发或基于其成果:
205
+
206
+ - **[Qwen3-TTS](https://github.com/QwenLM/Qwen3-TTS)**: 提供强大的 base 模型权重与原始架构设计。
207
+ - **[onnxruntime](https://github.com/microsoft/onnxruntime)**: 驱动高性能音频解码后端。
208
+ - **[llama.cpp](https://github.com/ggml-org/llama.cpp)**: 驱动 LLM 序列预测核心。
@@ -0,0 +1,169 @@
1
+ # 🌌 LunaVox: Qwen3-TTS C++ 高性能推理引擎
2
+
3
+ ![Version](https://img.shields.io/badge/version-2.0.0-blueviolet?style=for-the-badge)
4
+ ![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20Linux%20%7C%20macOS-0078d7?style=for-the-badge&logo=windows&logoColor=white)
5
+ ![CoreML](https://img.shields.io/badge/iOS-CoreML-000000?style=for-the-badge&logo=apple&logoColor=white)
6
+ ![C++](https://img.shields.io/badge/C++-17-00599C?style=for-the-badge&logo=c%2B%2B)
7
+ [![License](https://img.shields.io/badge/license-MIT-green?style=for-the-badge)](../../LICENSE)
8
+
9
+ **LunaVox** 是专为 **Qwen3-TTS** 打造的高性能 C++ 推理引擎。它通过精简的架构设计和深度的硬件优化,提供了极致的语音合成速度与灵活性。无论是本地嵌入式设备、桌面应用还是高性能服务器,LunaVox 都能提供稳定、低延迟的 TTS 体感。
10
+
11
+ ---
12
+
13
+ ## 🚀 核心特性
14
+
15
+ - **轻量级运行**: 仅需 ONNX Runtime 与自定义 Llama 推理库,无需繁重的 Python 环境即可运行。
16
+ - **多语言原生支持**: 引擎链路内置自动语言检测,完美支持 **中、英、日、韩、俄、德、法、意、西、葡** 十种语言。
17
+ - **全模式支持**: 支持 基础合成 (Base)、声音克隆 (Clone)、定制定制声音 (Custom) 及 创意声音设计 (Design)。
18
+ - **现代构建系统**: 全自动工具链识别。支持 Windows (MSVC)、Linux (GCC) 及 macOS (Clang/Apple Silicon)。
19
+ - **跨平台硬件加速**: 深度集成 CUDA (NVIDIA), CoreML/Metal (Apple), DML (DirectX 12) 与 Vulkan 接口。
20
+
21
+ ---
22
+
23
+ ## 🛠️ 环境与构建要求
24
+
25
+ ### 1. 系统环境
26
+ - **Windows**: Windows 10/11 (VS 2022/2025 支持)
27
+ - **Linux**: Ubuntu 22.04+ 或主流发行版 (GCC >= 9.0)
28
+ - **macOS**: Apple Silicon (M1/M2/M3), macOS 12+ (Metal 支持)
29
+ - **编译器**: MSVC (v143/v144)、GCC 10.0+ 或 Apple Clang
30
+ - **构建工具**: CMake 3.16+,建议安装 **Ninja** 提升构建速度
31
+
32
+ ### 2. 依赖库
33
+ - **Python 3.10+**: 用于模型转换和自动化管理。
34
+ - **ONNX Runtime SDK**: 对应平台的 C++ 动态库。
35
+ - **Llama Runtime**: 预编译的后端二进程文件。
36
+
37
+ ---
38
+
39
+ ## 📊 性能评估
40
+
41
+ 下表展示了 LunaVox 在不同后端配置下的平均性能表现。详细报告请参阅 **[Windows 性能评估报告](benchmark/windows_performance.md)**。
42
+
43
+ | 测试配置 | 平均 RTF | 峰值内存 (RAM) | 显存 (VRAM) | 相对加速比 (Speedup) |
44
+ | :--- | :---: | :---: | :---: | :---: |
45
+ | **Baseline (CPU)** | 5.066 | 5.06 GB | — | 1.00x |
46
+ | **Baseline (GPU)** | 3.788 | 1.59 GB | 2.29 GB | 1.34x |
47
+ | **LunaVox (Full CPU)** | 1.152 | 1.06 GB | — | 4.40x |
48
+ | **LunaVox (CUDA13)** | 0.254 | 1.39 GB | 1.30 GB | 19.94x |
49
+ | **LunaVox (Llama.cpp (Vulkan) / ORT (DML))** | **0.206** | 0.91 GB | 1.05 GB | **24.59x** |
50
+
51
+ > [!NOTE]
52
+ > - **测试模型**: 基于 **Qwen3-TTS-12Hz-0.6B-Base**,开启声音克隆模式并在使用 `.json` 预计算特征文件作为参考。
53
+ > - **测试环境**: Intel i9-12900K + NVIDIA RTX 3090
54
+ > - **测试标准**: 在 **3 次预热**后,取 **10 次运行**的平均结果。
55
+
56
+ ---
57
+
58
+ ### 3. CLI 工具与依赖安装
59
+
60
+ ```powershell
61
+ # 安装核心推理工具
62
+ pip install lunavox
63
+ ```
64
+
65
+ > [!NOTE]
66
+ > **开发与脚本说明**: LunaVox 已发布至 PyPI,标准用户仅需执行 `pip install lunavox` 即可安装完整工具。若您需要深入研究模型转换、量化流水线或导出 Python 脚本,请切换至 **`dev`** 分支获取最新源码与内部工具。
67
+
68
+ ## 📦 快速上手流程 (One-Key Setup)
69
+
70
+ LunaVox 推荐使用 `bootstrap` 指令一键完成 **模型拉取、运行库下载、项目构建及交互测试**。
71
+
72
+ ### 1. 自动引导安装 (推荐)
73
+ ```powershell
74
+ # 执行全自动引导设置
75
+ lunavox bootstrap
76
+ ```
77
+
78
+ ### 2. 本地构建 (从源码)
79
+ 如果您需要精细化控制每个步骤,可以运行:
80
+ ```powershell
81
+ # 1. 下载预转换模型 (或使用 convert 本地转换原始模型)
82
+ lunavox pull-model
83
+
84
+ # 2. 下载 C++ 运行库
85
+ lunavox download-libs
86
+
87
+ # 3. 自动编译项目
88
+ lunavox build --clean
89
+ ```
90
+
91
+ > [!TIP]
92
+ > 更多详细命令和高级参数说明,请参阅 **[LunaVox CLI 指令汇总手册](guide/cli_reference.md)**。
93
+
94
+ ---
95
+
96
+ ## 🧱 运行库依赖 (Libraries)
97
+
98
+ LunaVox 自动下载 `lib/` 下相应的 ONNX Runtime 与 Llama.cpp。如果您需要针对 CUDA 环境进行精细化配置,请参阅:
99
+ - **[CUDA 12 Windows 依赖指南](install/cuda12_windows.md)**
100
+ - **[CUDA 13 Windows 依赖指南](install/cuda13_windows.md)**
101
+
102
+ ---
103
+
104
+
105
+ ## 🎙️ 推理测试与模式说明
106
+
107
+ 编译完成后,可执行程序位于 `./build/qwen3-tts-cli.exe`。
108
+ > [!NOTE]
109
+ > - Linux/macOS 系统请使用 `./build/qwen3-tts-cli` 运行。
110
+ > - `--instruct` 仅对 **Custom** 和 **Design** 模式有效(Base 模式下禁用)。
111
+
112
+ 详细教程请参阅:**[CLI 指令使用指南](guide/usage_tutorial.md)**。
113
+
114
+ ### 1. 声音克隆 (Voice Cloning)
115
+ 通过参考音频(.wav)或预计算特征(.json)模仿特定音色:
116
+ ```bash
117
+ ./build/qwen3-tts-cli.exe `
118
+ -m models/base_small `
119
+ -r ref/ref_0.6B.json `
120
+ -t "Okay, fine, I'm just gonna leave this sock monkey here. Goodbye." `
121
+ -o output/cloned.wav
122
+ ```
123
+
124
+ ### 2. 定制化声音 (Custom Voice)
125
+ 使用系统内置的发音人 ID:
126
+ ```bash
127
+ ./build/qwen3-tts-cli.exe `
128
+ -m models/custom `
129
+ --speaker Vivian `
130
+ --instruct "Use angry tone." `
131
+ -t "She said she would be here by noon." `
132
+ -o output/custom.wav
133
+ ```
134
+
135
+ ### 3. 声音设计 (Voice Design)
136
+ 使用描述设计声音
137
+ ```bash
138
+ .\build\qwen3-tts-cli.exe `
139
+ -m models/design `
140
+ -t "It's in the top drawer... wait, it's empty? No way, that's impossible! I'm sure I put it there!" `
141
+ --instruct "Speak in an incredulous tone, but with a hint of panic beginning to creep into your voice."
142
+ -o output/out.wav `
143
+
144
+ ---
145
+
146
+ ## 📈 性能监控与日志
147
+
148
+ - **详细统计**: 运行命令时添加 `--stats-json report.json` 即可获取 RTF(实时率)和内存占用分析。
149
+ - **日志查看**: 所有的构建和运行输出均实时记录在 `../../logs/latest.log` 中。
150
+ - **线程控制**: 使用 `-j` 参数(默认 4)调整 CPU 线程使用。
151
+
152
+ ---
153
+
154
+ ## 📜 更多信息
155
+
156
+ 有关运行时详细配置及设计准则,请参阅:
157
+ - **[LunaVox CLI 指令汇总手册](guide/cli_reference.md)**
158
+ - **[运行时技术规范与约束](technical/runtime_specs.md)**
159
+ - **[合成链路编码器需求分析](technical/synthesis_pathway.md)**
160
+
161
+ ---
162
+
163
+ ## 🙏 致谢
164
+
165
+ 本项目深受以下开源项目的启发或基于其成果:
166
+
167
+ - **[Qwen3-TTS](https://github.com/QwenLM/Qwen3-TTS)**: 提供强大的 base 模型权重与原始架构设计。
168
+ - **[onnxruntime](https://github.com/microsoft/onnxruntime)**: 驱动高性能音频解码后端。
169
+ - **[llama.cpp](https://github.com/ggml-org/llama.cpp)**: 驱动 LLM 序列预测核心。
@@ -0,0 +1,60 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "lunavox"
7
+ version = "2.0.0"
8
+ description = "LunaVox tooling CLI for model setup, conversion, quantization, and build workflows."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "LunaVox Contributors" }]
13
+ dependencies = [
14
+ "typer>=0.12.0",
15
+ "rich>=13.7.0",
16
+ "huggingface_hub>=0.24.0",
17
+ ]
18
+
19
+ [project.optional-dependencies]
20
+ gui = [
21
+ "customtkinter>=5.2.0",
22
+ "pygame>=2.5.0",
23
+ ]
24
+ convert = [
25
+ "torch>=2.1",
26
+ "torchaudio>=2.1",
27
+ "transformers==4.57.6",
28
+ "accelerate==1.12.0",
29
+ "onnx",
30
+ "onnxruntime",
31
+ "onnxruntime-tools",
32
+ "gguf",
33
+ "numpy",
34
+ "tqdm",
35
+ "safetensors",
36
+ "sentencepiece",
37
+ "librosa",
38
+ "scipy",
39
+ "soundfile",
40
+ "einops",
41
+ "onnxscript",
42
+ ]
43
+ dev = [
44
+ "pytest>=8.2",
45
+ "build>=1.2",
46
+ "twine>=5.1",
47
+ ]
48
+
49
+ [project.scripts]
50
+ lunavox = "lunavox.cli.main:run"
51
+
52
+ [tool.setuptools]
53
+ package-dir = { "" = "src" }
54
+ include-package-data = true
55
+
56
+ [tool.setuptools.packages.find]
57
+ where = ["src"]
58
+
59
+ [tool.setuptools.package-data]
60
+ "lunavox.build" = ["*.json"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+