minicpmo-utils 0.0.5__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. minicpmo_utils-0.1.0/PKG-INFO +72 -0
  2. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/README.md +2 -17
  3. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/pyproject.toml +19 -53
  4. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/cosyvoice.py +5 -0
  5. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/file_utils.py +1 -2
  6. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo/__init__.py +10 -0
  7. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo/utils.py +1 -88
  8. minicpmo_utils-0.1.0/src/minicpmo_utils.egg-info/PKG-INFO +72 -0
  9. minicpmo_utils-0.1.0/src/minicpmo_utils.egg-info/requires.txt +20 -0
  10. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/token2wav.py +1 -0
  11. minicpmo_utils-0.0.5/PKG-INFO +0 -116
  12. minicpmo_utils-0.0.5/src/minicpmo_utils.egg-info/PKG-INFO +0 -116
  13. minicpmo_utils-0.0.5/src/minicpmo_utils.egg-info/requires.txt +0 -55
  14. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/setup.cfg +0 -0
  15. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/__init__.py +0 -0
  16. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/average_model.py +0 -0
  17. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/export_jit.py +0 -0
  18. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/export_onnx.py +0 -0
  19. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/inference_deprecated.py +0 -0
  20. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/train.py +0 -0
  21. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/__init__.py +0 -0
  22. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/frontend.py +0 -0
  23. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/model.py +0 -0
  24. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/dataset/__init__.py +0 -0
  25. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/dataset/dataset.py +0 -0
  26. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/dataset/processor.py +0 -0
  27. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/decoder.py +0 -0
  28. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/flow.py +0 -0
  29. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/flow_matching.py +0 -0
  30. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/length_regulator.py +0 -0
  31. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/discriminator.py +0 -0
  32. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/f0_predictor.py +0 -0
  33. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/generator.py +0 -0
  34. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/hifigan.py +0 -0
  35. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/llm/llm.py +0 -0
  36. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +0 -0
  37. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/tokenizer/tokenizer.py +0 -0
  38. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/__init__.py +0 -0
  39. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/activation.py +0 -0
  40. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/attention.py +0 -0
  41. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/convolution.py +0 -0
  42. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/decoder.py +0 -0
  43. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/decoder_layer.py +0 -0
  44. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/embedding.py +0 -0
  45. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/encoder.py +0 -0
  46. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/encoder_layer.py +0 -0
  47. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/label_smoothing_loss.py +0 -0
  48. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/positionwise_feed_forward.py +0 -0
  49. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/subsampling.py +0 -0
  50. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/upsample_encoder.py +0 -0
  51. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/__init__.py +0 -0
  52. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/class_utils.py +0 -0
  53. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/common.py +0 -0
  54. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/executor.py +0 -0
  55. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/frontend_utils.py +0 -0
  56. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/losses.py +0 -0
  57. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/mask.py +0 -0
  58. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/scheduler.py +0 -0
  59. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/train_utils.py +0 -0
  60. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/vllm/cosyvoice2.py +0 -0
  61. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/__init__.py +0 -0
  62. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/app.py +0 -0
  63. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/cli.py +0 -0
  64. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/hifigan/__init__.py +0 -0
  65. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/hifigan/config.py +0 -0
  66. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/hifigan/denoiser.py +0 -0
  67. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/hifigan/env.py +0 -0
  68. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/hifigan/meldataset.py +0 -0
  69. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/hifigan/models.py +0 -0
  70. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/hifigan/xutils.py +0 -0
  71. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/__init__.py +0 -0
  72. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/baselightningmodule.py +0 -0
  73. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/components/__init__.py +0 -0
  74. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/components/decoder.py +0 -0
  75. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/components/flow_matching.py +0 -0
  76. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/components/text_encoder.py +0 -0
  77. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/components/transformer.py +0 -0
  78. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/models/matcha_tts.py +0 -0
  79. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/onnx/__init__.py +0 -0
  80. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/onnx/export.py +0 -0
  81. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/onnx/infer.py +0 -0
  82. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/text/__init__.py +0 -0
  83. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/text/cleaners.py +0 -0
  84. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/text/numbers.py +0 -0
  85. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/text/symbols.py +0 -0
  86. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/train.py +0 -0
  87. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/__init__.py +0 -0
  88. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/audio.py +0 -0
  89. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/generate_data_statistics.py +0 -0
  90. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/instantiators.py +0 -0
  91. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/logging_utils.py +0 -0
  92. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/model.py +0 -0
  93. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/monotonic_align/__init__.py +0 -0
  94. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/monotonic_align/setup.py +0 -0
  95. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/pylogger.py +0 -0
  96. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/rich_utils.py +0 -0
  97. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/matcha/utils/utils.py +0 -0
  98. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo/version.py +0 -0
  99. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo_utils.egg-info/SOURCES.txt +0 -0
  100. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo_utils.egg-info/dependency_links.txt +0 -0
  101. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo_utils.egg-info/top_level.txt +0 -0
  102. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/__init__.py +0 -0
  103. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/assets/BAC009S0764W0121.wav +0 -0
  104. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/assets/BAC009S0764W0122.wav +0 -0
  105. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/assets/mel_filters.npz +0 -0
  106. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/cli.py +0 -0
  107. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/model.py +0 -0
  108. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/model_v2.py +0 -0
  109. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/s3tokenizer/utils.py +0 -0
  110. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/__init__.py +0 -0
  111. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/__init__.py +0 -0
  112. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/__init__.py +0 -0
  113. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/decoder_dit.py +0 -0
  114. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/flow.py +0 -0
  115. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/flow_matching.py +0 -0
  116. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/__init__.py +0 -0
  117. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/attention.py +0 -0
  118. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/embedding.py +0 -0
  119. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/encoder_layer.py +0 -0
  120. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/positionwise_feed_forward.py +0 -0
  121. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/subsampling.py +0 -0
  122. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/upsample_encoder_v2.py +0 -0
  123. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/__init__.py +0 -0
  124. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/class_utils.py +0 -0
  125. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/common.py +0 -0
  126. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/mask.py +0 -0
  127. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/__init__.py +0 -0
  128. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/cli.py +0 -0
  129. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/config.py +0 -0
  130. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/cosyvoice2.py +0 -0
  131. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/cosyvoice3.py +0 -0
  132. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/__init__.py +0 -0
  133. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/block_manager.py +0 -0
  134. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/llm_engine.py +0 -0
  135. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/model_runner.py +0 -0
  136. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/scheduler.py +0 -0
  137. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/sequence.py +0 -0
  138. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/__init__.py +0 -0
  139. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow.py +0 -0
  140. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow_components/__init__.py +0 -0
  141. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow_components/estimator.py +0 -0
  142. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow_components/upsample_encoder.py +0 -0
  143. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/hifigan.py +0 -0
  144. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/hifigan_components/__init__.py +0 -0
  145. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/hifigan_components/layers.py +0 -0
  146. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/qwen2.py +0 -0
  147. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/qwen2_components/__init__.py +0 -0
  148. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/qwen2_components/layers.py +0 -0
  149. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/sampler.py +0 -0
  150. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/__init__.py +0 -0
  151. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/audio.py +0 -0
  152. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/context.py +0 -0
  153. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/loader.py +0 -0
  154. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/memory.py +0 -0
  155. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/stepaudio2.py +0 -0
  156. {minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/utils.py +0 -0
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: minicpmo-utils
3
+ Version: 0.1.0
4
+ Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
5
+ Author: MiniCPM-o Utils Maintainers
6
+ License: Apache-2.0
7
+ Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: torch>=2.3.0
18
+ Requires-Dist: torchaudio>=2.3.0
19
+ Requires-Dist: transformers>=4.49.0
20
+ Requires-Dist: numpy
21
+ Requires-Dist: hyperpyyaml
22
+ Requires-Dist: modelscope
23
+ Requires-Dist: openai-whisper
24
+ Requires-Dist: tqdm
25
+ Requires-Dist: tiktoken
26
+ Requires-Dist: inflect
27
+ Requires-Dist: omegaconf
28
+ Requires-Dist: einops
29
+ Requires-Dist: librosa
30
+ Requires-Dist: onnxruntime>=1.18.0
31
+ Requires-Dist: diffusers
32
+ Provides-Extra: gpu
33
+ Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
34
+
35
+ ## minicpmo-utils
36
+
37
+ 一个统一安装的工具包(一个 PyPI 分发包),把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel,并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
38
+
39
+ ### 安装方式
40
+
41
+ - 从源码本地安装(开发态,可编辑):
42
+ ```bash
43
+ cd minicpmo-utils
44
+ pip install -e .
45
+ ```
46
+
47
+ - 构建并安装 wheel(推荐分发):
48
+ ```bash
49
+ cd minicpmo-utils
50
+ python -m build # 生成 dist/*.whl
51
+ pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
52
+ ```
53
+
54
+ ### 导入方式
55
+
56
+ 包会暴露以下顶层模块,安装后可直接使用:
57
+ - `import cosyvoice`
58
+ - `import stepaudio2`
59
+ - `import matcha`
60
+ - `import minicpmo`
61
+
62
+ 也支持通过统一入口导入子包:
63
+ ```python
64
+ from minicpmo import cosyvoice, stepaudio2, matcha
65
+ ```
66
+
67
+ 以及通过统一的 utils 入口使用通用工具函数,例如:
68
+
69
+ ```python
70
+ from minicpmo.utils import get_video_frame_audio_segments
71
+ ```
72
+
@@ -4,32 +4,17 @@
4
4
 
5
5
  ### 安装方式
6
6
 
7
- - 从源码本地安装(开发态,可编辑,默认只装公共依赖):
7
+ - 从源码本地安装(开发态,可编辑):
8
8
  ```bash
9
9
  cd minicpmo-utils
10
10
  pip install -e .
11
11
  ```
12
12
 
13
- - 如果只想安装 cosyvoice 相关依赖(TTS):
14
- ```bash
15
- pip install -e .[tts]
16
- ```
17
-
18
- - 如果只想安装 stepaudio2 / streaming 相关依赖:
19
- ```bash
20
- pip install -e .[streaming]
21
- ```
22
-
23
- - 同时安装 cosyvoice + stepaudio2 相关依赖:
24
- ```bash
25
- pip install -e .[tts,streaming]
26
- ```
27
-
28
13
  - 构建并安装 wheel(推荐分发):
29
14
  ```bash
30
15
  cd minicpmo-utils
31
16
  python -m build # 生成 dist/*.whl
32
- pip install \"dist/minicpmo_utils-0.1.0-py3-none-any.whl[tts,streaming]\"
17
+ pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
33
18
  ```
34
19
 
35
20
  ### 导入方式
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "minicpmo-utils"
7
- version = "0.0.5"
7
+ version = "0.1.0"
8
8
  description = "Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -31,68 +31,34 @@ classifiers = [
31
31
  # - s3tokenizer (来自 S3Tokenizer-main)
32
32
  # - minicpmo (本项目扩展 utils 的统一入口:from minicpmo.utils import ...)
33
33
  dependencies = [
34
- "numpy",
35
- "pillow==10.4.0",
36
- "librosa==0.9.0",
37
- "decord==0.6.0",
38
- "moviepy==2.1.2",
39
- "numba==0.61.2",
40
- ]
41
-
42
- [project.optional-dependencies]
43
- # cosyvoice TTS 相关依赖
44
- tts = [
34
+ # Core ML frameworks (align to stepaudio2's requirements)
45
35
  "torch>=2.3.0",
46
36
  "torchaudio>=2.3.0",
47
- "transformers>=4.51.0,<4.53.0", # 4.52+ 有兼容性问题
48
- "onnxruntime>=1.18.0,<=1.21.0",
49
- "onnx",
37
+ "transformers>=4.49.0",
38
+
39
+ # Shared / common
40
+ "numpy",
50
41
  "hyperpyyaml",
51
- "openai-whisper==20231117",
42
+
43
+ # cosyvoice side
44
+ "modelscope",
45
+ "openai-whisper",
52
46
  "tqdm",
53
47
  "tiktoken",
54
48
  "inflect",
55
- "omegaconf>=2.0.6",
56
- "conformer==0.3.2",
57
- "einops==0.8.1",
58
- "hydra-core",
59
- "lightning==2.2.4",
60
- "rich",
61
- "gdown==5.2.0",
62
- "matplotlib",
63
- "wget",
64
- "pyarrow",
65
- "pyworld",
66
- # 新增依赖
67
- "scipy",
68
- "pyyaml",
69
- "regex",
70
- "soundfile==0.12.1",
71
- "diffusers==0.29.0"
72
- ]
73
-
74
- # stepaudio2 基础依赖(token2wav 等)
75
- streaming = [
76
- "minicpmo-utils[tts]", # streaming 依赖 tts
77
- ]
49
+ "omegaconf",
50
+ "einops",
78
51
 
79
- # stepaudio2 Flash 推理引擎依赖(flashcosyvoice.engine 模块需要)
80
- streaming-flash = [
81
- "minicpmo-utils[streaming]",
82
- "flash-attn>=2.6.0; sys_platform == 'linux'",
83
- "triton>=2.3.0; sys_platform == 'linux'",
84
- "safetensors",
85
- "pynvml",
86
- "xxhash",
52
+ # stepaudio2 side
53
+ "librosa",
54
+ "onnxruntime>=1.18.0",
55
+ "diffusers",
87
56
  ]
88
57
 
89
- # Linux GPU onnxruntime 可以很重,且与环境强相关,保留为可选 extra
58
+ [project.optional-dependencies]
59
+ # Linux GPU onnxruntime can be heavy and environment-specific; keep as an opt-in extra.
90
60
  gpu = [
91
- "onnxruntime-gpu>=1.18.0,<=1.23.2; sys_platform == 'linux'",
92
- ]
93
-
94
- all = [
95
- "minicpmo-utils[tts,streaming,gpu]",
61
+ "onnxruntime-gpu>=1.18.0; sys_platform == 'linux'",
96
62
  ]
97
63
 
98
64
  [tool.setuptools]
@@ -16,6 +16,7 @@ import time
16
16
  from typing import Generator
17
17
  from tqdm import tqdm
18
18
  from hyperpyyaml import load_hyperpyyaml
19
+ from modelscope import snapshot_download
19
20
  import torch
20
21
  from cosyvoice.cli.frontend import CosyVoiceFrontEnd
21
22
  from cosyvoice.cli.model import CosyVoiceModel, CosyVoice2Model
@@ -29,6 +30,8 @@ class CosyVoice:
29
30
  self.instruct = True if '-Instruct' in model_dir else False
30
31
  self.model_dir = model_dir
31
32
  self.fp16 = fp16
33
+ if not os.path.exists(model_dir):
34
+ model_dir = snapshot_download(model_dir)
32
35
  hyper_yaml_path = '{}/cosyvoice.yaml'.format(model_dir)
33
36
  if not os.path.exists(hyper_yaml_path):
34
37
  raise ValueError('{} not found!'.format(hyper_yaml_path))
@@ -151,6 +154,8 @@ class CosyVoice2(CosyVoice):
151
154
  self.instruct = True if '-Instruct' in model_dir else False
152
155
  self.model_dir = model_dir
153
156
  self.fp16 = fp16
157
+ if not os.path.exists(model_dir):
158
+ model_dir = snapshot_download(model_dir)
154
159
 
155
160
  if config_path is None:
156
161
  config_path = f'{model_dir}/cosyvoice2.yaml'
@@ -20,8 +20,7 @@ import torch
20
20
  import torchaudio
21
21
  import logging
22
22
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
23
- logging.getLogger('numba').setLevel(logging.WARNING)
24
- logging.basicConfig(level=logging.INFO,
23
+ logging.basicConfig(level=logging.DEBUG,
25
24
  format='%(asctime)s %(levelname)s %(message)s')
26
25
 
27
26
 
@@ -12,3 +12,13 @@
12
12
  """
13
13
 
14
14
  from .version import __version__
15
+
16
+ # Eager re-exports to allow:
17
+ # from minicpmo import cosyvoice, stepaudio2, matcha
18
+ # 而不需要懒加载。
19
+ import cosyvoice as cosyvoice
20
+ import stepaudio2 as stepaudio2
21
+ import matcha as matcha
22
+
23
+ __all__ = ["__version__", "cosyvoice", "stepaudio2", "matcha"]
24
+
@@ -26,84 +26,6 @@ MAX_NUM_FRAMES = int(os.getenv("MAX_NUM_FRAMES", 64))
26
26
  VIDEO_MME_DURATION = os.getenv("VIDEO_MME_DURATION", "ALL")
27
27
 
28
28
 
29
- def find_cjk_font():
30
- """
31
- 查找支持中文的字体。
32
- 按优先级返回字体名称或路径,如果找不到则返回 None。
33
- """
34
- # 常见的中文字体名称(按优先级排列)
35
- font_names = [
36
- # Noto CJK 字体(推荐)
37
- "Noto Sans CJK SC",
38
- "Noto Sans CJK",
39
- "NotoSansCJK-Regular",
40
- # 文泉驿字体
41
- "WenQuanYi Zen Hei",
42
- "WenQuanYi Micro Hei",
43
- "文泉驿正黑",
44
- "文泉驿微米黑",
45
- # 思源字体
46
- "Source Han Sans SC",
47
- "Source Han Sans CN",
48
- # 其他常见中文字体
49
- "SimHei",
50
- "Microsoft YaHei",
51
- "PingFang SC",
52
- "Hiragino Sans GB",
53
- "STHeiti",
54
- "AR PL UMing CN",
55
- "AR PL UKai CN",
56
- ]
57
-
58
- # 常见的中文字体文件路径
59
- font_paths = [
60
- # Noto CJK
61
- "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
62
- "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
63
- "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
64
- "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
65
- # 文泉驿
66
- "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
67
- "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
68
- "/usr/share/fonts/wenquanyi/wqy-zenhei/wqy-zenhei.ttc",
69
- "/usr/share/fonts/wenquanyi/wqy-microhei/wqy-microhei.ttc",
70
- # macOS
71
- "/System/Library/Fonts/PingFang.ttc",
72
- "/Library/Fonts/Arial Unicode.ttf",
73
- # Windows (WSL)
74
- "/mnt/c/Windows/Fonts/msyh.ttc",
75
- "/mnt/c/Windows/Fonts/simhei.ttf",
76
- ]
77
-
78
- # 首先尝试使用 fc-list 查找已安装的中文字体
79
- try:
80
- result = subprocess.run(
81
- ["fc-list", ":lang=zh", "-f", "%{family}\n"],
82
- capture_output=True,
83
- text=True,
84
- timeout=5,
85
- )
86
- if result.returncode == 0:
87
- installed_fonts = set(result.stdout.strip().split("\n"))
88
- for font_name in font_names:
89
- for installed in installed_fonts:
90
- if font_name.lower() in installed.lower():
91
- logger.info(f"Found CJK font via fc-list: {installed}")
92
- return installed.split(",")[0] # 取第一个名称
93
- except Exception as e:
94
- logger.debug(f"fc-list failed: {e}")
95
-
96
- # 然后检查常见的字体文件路径
97
- for path in font_paths:
98
- if os.path.exists(path):
99
- logger.info(f"Found CJK font file: {path}")
100
- return path
101
-
102
- logger.warning("No CJK font found. Chinese subtitles may display as boxes/garbled text.")
103
- logger.warning("Install Chinese fonts with: sudo apt-get install fonts-noto-cjk")
104
- return None
105
-
106
-
107
29
  def concat_images(images, bg_color=(255, 255, 255), cell_size=None, line_color=(0, 0, 0), line_width=6):
108
30
  """
109
31
  images: List[PIL.Image.Image]
@@ -649,18 +571,9 @@ def generate_duplex_video(
649
571
 
650
572
  if has_subtitles:
651
573
  srt_path_escaped = srt_path.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
652
-
653
- # 查找支持中文的字体
654
- cjk_font = find_cjk_font()
655
- font_style = ""
656
- if cjk_font:
657
- # 转义字体路径/名称中的特殊字符
658
- font_escaped = cjk_font.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
659
- font_style = f"FontName={font_escaped},"
660
-
661
574
  subtitle_filter = (
662
575
  f"subtitles='{srt_path_escaped}':"
663
- f"force_style='{font_style}FontSize=28,"
576
+ f"force_style='FontSize=28,"
664
577
  f"PrimaryColour=&H00FFFFFF,"
665
578
  f"OutlineColour=&H00000000,"
666
579
  f"BorderStyle=3,"
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: minicpmo-utils
3
+ Version: 0.1.0
4
+ Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
5
+ Author: MiniCPM-o Utils Maintainers
6
+ License: Apache-2.0
7
+ Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: torch>=2.3.0
18
+ Requires-Dist: torchaudio>=2.3.0
19
+ Requires-Dist: transformers>=4.49.0
20
+ Requires-Dist: numpy
21
+ Requires-Dist: hyperpyyaml
22
+ Requires-Dist: modelscope
23
+ Requires-Dist: openai-whisper
24
+ Requires-Dist: tqdm
25
+ Requires-Dist: tiktoken
26
+ Requires-Dist: inflect
27
+ Requires-Dist: omegaconf
28
+ Requires-Dist: einops
29
+ Requires-Dist: librosa
30
+ Requires-Dist: onnxruntime>=1.18.0
31
+ Requires-Dist: diffusers
32
+ Provides-Extra: gpu
33
+ Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
34
+
35
+ ## minicpmo-utils
36
+
37
+ 一个统一安装的工具包(一个 PyPI 分发包),把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel,并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
38
+
39
+ ### 安装方式
40
+
41
+ - 从源码本地安装(开发态,可编辑):
42
+ ```bash
43
+ cd minicpmo-utils
44
+ pip install -e .
45
+ ```
46
+
47
+ - 构建并安装 wheel(推荐分发):
48
+ ```bash
49
+ cd minicpmo-utils
50
+ python -m build # 生成 dist/*.whl
51
+ pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
52
+ ```
53
+
54
+ ### 导入方式
55
+
56
+ 包会暴露以下顶层模块,安装后可直接使用:
57
+ - `import cosyvoice`
58
+ - `import stepaudio2`
59
+ - `import matcha`
60
+ - `import minicpmo`
61
+
62
+ 也支持通过统一入口导入子包:
63
+ ```python
64
+ from minicpmo import cosyvoice, stepaudio2, matcha
65
+ ```
66
+
67
+ 以及通过统一的 utils 入口使用通用工具函数,例如:
68
+
69
+ ```python
70
+ from minicpmo.utils import get_video_frame_audio_segments
71
+ ```
72
+
@@ -0,0 +1,20 @@
1
+ torch>=2.3.0
2
+ torchaudio>=2.3.0
3
+ transformers>=4.49.0
4
+ numpy
5
+ hyperpyyaml
6
+ modelscope
7
+ openai-whisper
8
+ tqdm
9
+ tiktoken
10
+ inflect
11
+ omegaconf
12
+ einops
13
+ librosa
14
+ onnxruntime>=1.18.0
15
+ diffusers
16
+
17
+ [gpu]
18
+
19
+ [gpu:sys_platform == "linux"]
20
+ onnxruntime-gpu>=1.18.0
@@ -7,6 +7,7 @@ import torchaudio
7
7
  import s3tokenizer
8
8
  import onnxruntime
9
9
  import numpy as np
10
+ from copy import deepcopy
10
11
 
11
12
  import torchaudio.compliance.kaldi as kaldi
12
13
  from stepaudio2.flashcosyvoice.modules.hifigan import HiFTGenerator
@@ -1,116 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: minicpmo-utils
3
- Version: 0.0.5
4
- Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
5
- Author: MiniCPM-o Utils Maintainers
6
- License: Apache-2.0
7
- Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
8
- Classifier: Development Status :: 4 - Beta
9
- Classifier: Intended Audience :: Developers
10
- Classifier: License :: OSI Approved :: Apache Software License
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
- Requires-Python: >=3.10
16
- Description-Content-Type: text/markdown
17
- Requires-Dist: numpy
18
- Requires-Dist: pillow==10.4.0
19
- Requires-Dist: librosa==0.9.0
20
- Requires-Dist: decord==0.6.0
21
- Requires-Dist: moviepy==2.1.2
22
- Requires-Dist: numba==0.61.2
23
- Provides-Extra: tts
24
- Requires-Dist: torch>=2.3.0; extra == "tts"
25
- Requires-Dist: torchaudio>=2.3.0; extra == "tts"
26
- Requires-Dist: transformers<4.53.0,>=4.51.0; extra == "tts"
27
- Requires-Dist: onnxruntime<=1.21.0,>=1.18.0; extra == "tts"
28
- Requires-Dist: onnx; extra == "tts"
29
- Requires-Dist: hyperpyyaml; extra == "tts"
30
- Requires-Dist: openai-whisper==20231117; extra == "tts"
31
- Requires-Dist: tqdm; extra == "tts"
32
- Requires-Dist: tiktoken; extra == "tts"
33
- Requires-Dist: inflect; extra == "tts"
34
- Requires-Dist: omegaconf>=2.0.6; extra == "tts"
35
- Requires-Dist: conformer==0.3.2; extra == "tts"
36
- Requires-Dist: einops==0.8.1; extra == "tts"
37
- Requires-Dist: hydra-core; extra == "tts"
38
- Requires-Dist: lightning==2.2.4; extra == "tts"
39
- Requires-Dist: rich; extra == "tts"
40
- Requires-Dist: gdown==5.2.0; extra == "tts"
41
- Requires-Dist: matplotlib; extra == "tts"
42
- Requires-Dist: wget; extra == "tts"
43
- Requires-Dist: pyarrow; extra == "tts"
44
- Requires-Dist: pyworld; extra == "tts"
45
- Requires-Dist: scipy; extra == "tts"
46
- Requires-Dist: pyyaml; extra == "tts"
47
- Requires-Dist: regex; extra == "tts"
48
- Requires-Dist: soundfile==0.12.1; extra == "tts"
49
- Requires-Dist: diffusers==0.29.0; extra == "tts"
50
- Provides-Extra: streaming
51
- Requires-Dist: minicpmo-utils[tts]; extra == "streaming"
52
- Provides-Extra: streaming-flash
53
- Requires-Dist: minicpmo-utils[streaming]; extra == "streaming-flash"
54
- Requires-Dist: flash-attn>=2.6.0; sys_platform == "linux" and extra == "streaming-flash"
55
- Requires-Dist: triton>=2.3.0; sys_platform == "linux" and extra == "streaming-flash"
56
- Requires-Dist: safetensors; extra == "streaming-flash"
57
- Requires-Dist: pynvml; extra == "streaming-flash"
58
- Requires-Dist: xxhash; extra == "streaming-flash"
59
- Provides-Extra: gpu
60
- Requires-Dist: onnxruntime-gpu<=1.23.2,>=1.18.0; sys_platform == "linux" and extra == "gpu"
61
- Provides-Extra: all
62
- Requires-Dist: minicpmo-utils[gpu,streaming,tts]; extra == "all"
63
-
64
- ## minicpmo-utils
65
-
66
- 一个统一安装的工具包(一个 PyPI 分发包),把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel,并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
67
-
68
- ### 安装方式
69
-
70
- - 从源码本地安装(开发态,可编辑,默认只装公共依赖):
71
- ```bash
72
- cd minicpmo-utils
73
- pip install -e .
74
- ```
75
-
76
- - 如果只想安装 cosyvoice 相关依赖(TTS):
77
- ```bash
78
- pip install -e .[tts]
79
- ```
80
-
81
- - 如果只想安装 stepaudio2 / streaming 相关依赖:
82
- ```bash
83
- pip install -e .[streaming]
84
- ```
85
-
86
- - 同时安装 cosyvoice + stepaudio2 相关依赖:
87
- ```bash
88
- pip install -e .[tts,streaming]
89
- ```
90
-
91
- - 构建并安装 wheel(推荐分发):
92
- ```bash
93
- cd minicpmo-utils
94
- python -m build # 生成 dist/*.whl
95
- pip install \"dist/minicpmo_utils-0.1.0-py3-none-any.whl[tts,streaming]\"
96
- ```
97
-
98
- ### 导入方式
99
-
100
- 包会暴露以下顶层模块,安装后可直接使用:
101
- - `import cosyvoice`
102
- - `import stepaudio2`
103
- - `import matcha`
104
- - `import minicpmo`
105
-
106
- 也支持通过统一入口导入子包:
107
- ```python
108
- from minicpmo import cosyvoice, stepaudio2, matcha
109
- ```
110
-
111
- 以及通过统一的 utils 入口使用通用工具函数,例如:
112
-
113
- ```python
114
- from minicpmo.utils import get_video_frame_audio_segments
115
- ```
116
-