PyPI - qwen-tts - Versions diffs - 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

qwen-tts 0.0.4py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

qwen_tts/core/models/modeling_qwen3_tts.py CHANGED Viewed

@@ -470,7 +470,7 @@ class Qwen3TTSPreTrainedModel(PreTrainedModel):
     supports_gradient_checkpointing = True
     _no_split_modules = ["Qwen3TTSDecoderLayer"]
     _skip_keys_device_placement = "past_key_values"
-    _supports_flash_attn_2 = True
+    _supports_flash_attn = True
     _supports_sdpa = True
     _supports_cache_class = True
     _supports_static_cache = False
@@ -501,8 +501,7 @@ class Qwen3TTSTalkerTextPreTrainedModel(PreTrainedModel):
     supports_gradient_checkpointing = True
     _no_split_modules = []
     _skip_keys_device_placement = ["past_key_values"]
-    _supports_flash_attn_3 = True
-    _supports_flash_attn_2 = True
+    _supports_flash_attn = True
     _supports_sdpa = True
     _supports_flex_attn = True
     _supports_cache_class = True
@@ -1869,6 +1868,11 @@ class Qwen3TTSForConditionalGeneration(Qwen3TTSPreTrainedModel, GenerationMixin)
         weights_only=True,
         **kwargs,
     ):
+        # Hotfix to enable passing the correct attn implementation which is stored in the config but not in kwargs
+        requested_attn_implementation = kwargs.pop("attn_implementation", None)
+        if requested_attn_implementation is None and config and config._attn_implementation:
+            requested_attn_implementation = config._attn_implementation
         model = super().from_pretrained(
             pretrained_model_name_or_path,
             *model_args,
@@ -1881,6 +1885,7 @@ class Qwen3TTSForConditionalGeneration(Qwen3TTSPreTrainedModel, GenerationMixin)
             revision=revision,
             use_safetensors=use_safetensors,
             weights_only=weights_only,
+            attn_implementation=requested_attn_implementation,
             **kwargs,
         )
         if not local_files_only and not os.path.isdir(pretrained_model_name_or_path):

qwen_tts/core/tokenizer_12hz/modeling_qwen3_tts_tokenizer_v2.py CHANGED Viewed

@@ -198,12 +198,13 @@ class Qwen3TTSTokenizerV2CausalTransConvNet(nn.Module):
         self.conv = nn.ConvTranspose1d(in_channels, out_channels, kernel_size, stride=stride)
         pad = kernel_size - stride
-        self.left_pad = math.ceil(pad)
-        self.right_pad = pad = self.left_pad
+        self.left_pad = 0
+        self.right_pad = int(pad)
     def forward(self, hidden_state):
         hidden_state = self.conv(hidden_state)
-        hidden_state = hidden_state[..., self.left_pad : hidden_state.shape[-1] - self.right_pad]
+        if self.right_pad > 0:
+            hidden_state = hidden_state[..., : hidden_state.shape[-1] - self.right_pad]
         return hidden_state.contiguous()

qwen_tts/inference/qwen3_tts_model.py CHANGED Viewed

@@ -639,7 +639,7 @@ class Qwen3TTSModel:
         text: Union[str, List[str]],
         instruct: Union[str, List[str]],
         language: Union[str, List[str]] = None,
-        non_streaming_mode: bool = False,
+        non_streaming_mode: bool = True,
         **kwargs,
     ) -> Tuple[List[np.ndarray], int]:
         """
@@ -735,7 +735,7 @@ class Qwen3TTSModel:
         speaker: Union[str, List[str]],
         language: Union[str, List[str]] = None,
         instruct: Optional[Union[str, List[str]]] = None,
-        non_streaming_mode: bool = False,
+        non_streaming_mode: bool = True,
         **kwargs,
     ) -> Tuple[List[np.ndarray], int]:
         """

{qwen_tts-0.0.4.dist-info → qwen_tts-0.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: qwen-tts
-Version: 0.0.4
+Version: 0.1.0
 Summary: Qwen-TTS python package
 Author: Alibaba Qwen Team
 License: Apache-2.0
@@ -35,7 +35,7 @@ Dynamic: license-file
 <p>
 <p align="center">
-&nbsp&nbsp🤗 <a href="https://huggingface.co/collections/Qwen/qwen3-tts">Hugging Face</a>&nbsp&nbsp | &nbsp&nbsp🤖 <a href="https://modelscope.cn/collections/Qwen/Qwen3-TTS">ModelScope</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://qwen.ai/blog?id=qwen3tts-0115">Blog</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://github.com/QwenLM/Qwen3-TTS/blob/main/assets/Qwen3_TTS.pdf">Paper</a>&nbsp&nbsp
+&nbsp&nbsp🤗 <a href="https://huggingface.co/collections/Qwen/qwen3-tts">Hugging Face</a>&nbsp&nbsp | &nbsp&nbsp🤖 <a href="https://modelscope.cn/collections/Qwen/Qwen3-TTS">ModelScope</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://qwen.ai/blog?id=qwen3tts-0115">Blog</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://arxiv.org/abs/2601.15621">Paper</a>&nbsp&nbsp
 <br>
 🖥️ <a href="https://huggingface.co/spaces/Qwen/Qwen3-TTS">Hugging Face Demo</a>&nbsp&nbsp | &nbsp&nbsp 🖥️ <a href="https://modelscope.cn/studios/Qwen/Qwen3-TTS">ModelScope Demo</a>&nbsp&nbsp | &nbsp&nbsp💬 <a href="https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png">WeChat (微信)</a>&nbsp&nbsp | &nbsp&nbsp🫨 <a href="https://discord.gg/CV4E9rpNSD">Discord</a>&nbsp&nbsp | &nbsp&nbsp📑 <a href="https://help.aliyun.com/zh/model-studio/qwen-tts-realtime">API</a>
@@ -66,7 +66,7 @@ We release **Qwen3-TTS**, a series of powerful speech generation capabilities de
 - [vLLM Usage](#vllm-usage)
 - [Fine Tuning](#fine-tuning)
 - [Evaluation](#evaluation)
-<!-- - [Citation](#citation) -->
+- [Citation](#citation)
 ## Overview
 ### Introduction
@@ -1367,18 +1367,23 @@ During evaluation, we ran inference for all models with `dtype=torch.bfloat16` a
 </details>
-<!-- ## Citation
+## Citation
 If you find our paper and code useful in your research, please consider giving a star :star: and citation :pencil: :)
 ```BibTeX
 @article{Qwen3-TTS,
   title={Qwen3-TTS Technical Report},
-  author={},
-  journal={arXiv preprint arXiv:},
+  author={Hangrui Hu and Xinfa Zhu and Ting He and Dake Guo and Bin Zhang and Xiong Wang and Zhifang Guo and Ziyue Jiang and Hongkun Hao and Zishan Guo and Xinyu Zhang and Pei Zhang and Baosong Yang and Jin Xu and Jingren Zhou and Junyang Lin},
+  journal={arXiv preprint arXiv:2601.15621},
   year={2026}
 }
-``` -->
+```
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=QwenLM/Qwen3-TTS&type=Date)](https://star-history.com/#QwenLM/Qwen3-TTS&Date)
 <br>

{qwen_tts-0.0.4.dist-info → qwen_tts-0.1.0.dist-info}/RECORD RENAMED Viewed

@@ -4,21 +4,21 @@ qwen_tts/cli/demo.py,sha256=6ijgkwdT4Fy91Tq3vZC3voGrhZVJkCdx2xXws6v81es,29160
 qwen_tts/core/__init__.py,sha256=GzgNnehDttWF2TjDOdBmE2VnynElQSZ0I0IEr0OGZ54,990
 qwen_tts/core/models/__init__.py,sha256=kX042P1-2E3nNwP9I5TVNcpBbhVpTR5QMk5KBtQCLII,807
 qwen_tts/core/models/configuration_qwen3_tts.py,sha256=9Shn8U_eBqQW3RSGTVA85tE9CgjV9dowGR4cgME_XRg,26428
-qwen_tts/core/models/modeling_qwen3_tts.py,sha256=nFtU2UNWBMbL_aD7uut31MlZsPlVkrAPQMUH9TZO3jg,99825
+qwen_tts/core/models/modeling_qwen3_tts.py,sha256=JcQmVrz4EPBu9rwYOb1wg_PIz-2sOhR8QGC0JisclqA,100211
 qwen_tts/core/models/processing_qwen3_tts.py,sha256=YUciAxiORu2mjXQMJfDyKOziSmHs-ULlfW5J54tNa80,4022
 qwen_tts/core/tokenizer_12hz/configuration_qwen3_tts_tokenizer_v2.py,sha256=njDCQ5SwDLA2bX2jSCt0NkaKzBzT2hpv5hSh00ZTpeM,7946
-qwen_tts/core/tokenizer_12hz/modeling_qwen3_tts_tokenizer_v2.py,sha256=ZZEFydnpx1TH3cgomRFAWA6R-uA11_EGtpVidUc6s78,40467
+qwen_tts/core/tokenizer_12hz/modeling_qwen3_tts_tokenizer_v2.py,sha256=OT_C6mD12rlnjYMJqHyT_t1idNDrFieXoEttbamaSJs,40464
 qwen_tts/core/tokenizer_25hz/configuration_qwen3_tts_tokenizer_v1.py,sha256=KPlPcV332W02XJzqACHZzKjGPS9I9IQjxDo9PK4o7wI,14494
 qwen_tts/core/tokenizer_25hz/modeling_qwen3_tts_tokenizer_v1.py,sha256=SdXRMvkR2uW8Fa_FzvRicu3nw4FzTHnCSLkUmfxBPF0,56472
 qwen_tts/core/tokenizer_25hz/vq/core_vq.py,sha256=01-p8A70hjNhST4QL04o8JJIhJcRffWAgZ6Ttd_zuN4,20074
 qwen_tts/core/tokenizer_25hz/vq/speech_vq.py,sha256=fGj8uoxQjYoY4iQbVjMC0b9G-OyxUVxZIiJj2FJLspg,14833
 qwen_tts/core/tokenizer_25hz/vq/whisper_encoder.py,sha256=oXSLNJaLqO_-v5AsSkOZlBs-Sbyj9sASD8Zz47p9dn0,14351
 qwen_tts/core/tokenizer_25hz/vq/assets/mel_filters.npz,sha256=dFCucHI6XvnTQePO5ijHywF382zkLES37SvzMl8PbUw,4271
-qwen_tts/inference/qwen3_tts_model.py,sha256=lPt0DAv8pHDpUyXQpVmTnmjtok6j0ww0TFuWniAxghA,37123
+qwen_tts/inference/qwen3_tts_model.py,sha256=4dpFBzKFfB9f4-NuurhdsvbcakjKr_aXP0YzhOMCdeQ,37121
 qwen_tts/inference/qwen3_tts_tokenizer.py,sha256=vX1-6_rJIGQ7QtKd932ngHJYvEBjBUSos1tCEtOyFaw,15698
-qwen_tts-0.0.4.dist-info/licenses/LICENSE,sha256=pEpggcc6118CVbsrtcq3TvGClWWolaJOU6TxEpCrdlU,11343
-qwen_tts-0.0.4.dist-info/METADATA,sha256=C8_PgKFWjwp4YpUzWGTbDgfVjczbTEaPIEbJSAtkJFM,61044
-qwen_tts-0.0.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-qwen_tts-0.0.4.dist-info/entry_points.txt,sha256=hcoVetKUabLdCmu2ST4jGo8jilnoslpFzV-bxlHf0E0,57
-qwen_tts-0.0.4.dist-info/top_level.txt,sha256=1o-44WiYkUtYVTiL9eexzyNQXK6YWCOGZltO81PUirA,9
-qwen_tts-0.0.4.dist-info/RECORD,,
+qwen_tts-0.1.0.dist-info/licenses/LICENSE,sha256=pEpggcc6118CVbsrtcq3TvGClWWolaJOU6TxEpCrdlU,11343
+qwen_tts-0.1.0.dist-info/METADATA,sha256=qIoqgk0JBnvOKTWCh0ijBaECdDTAJHdkf5DhQwaRTPg,61393
+qwen_tts-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+qwen_tts-0.1.0.dist-info/entry_points.txt,sha256=hcoVetKUabLdCmu2ST4jGo8jilnoslpFzV-bxlHf0E0,57
+qwen_tts-0.1.0.dist-info/top_level.txt,sha256=1o-44WiYkUtYVTiL9eexzyNQXK6YWCOGZltO81PUirA,9
+qwen_tts-0.1.0.dist-info/RECORD,,

{qwen_tts-0.0.4.dist-info → qwen_tts-0.1.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{qwen_tts-0.0.4.dist-info → qwen_tts-0.1.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{qwen_tts-0.0.4.dist-info → qwen_tts-0.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{qwen_tts-0.0.4.dist-info → qwen_tts-0.1.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

qwen-tts 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl

qwen-tts 0.0.4py3-none-any.whl → 0.1.0py3-none-any.whl