minicpmo-utils 0.0.6__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/PKG-INFO +15 -32
  2. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/pyproject.toml +18 -49
  3. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/file_utils.py +1 -2
  4. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/minicpmo/__init__.py +10 -0
  5. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/minicpmo/utils.py +1 -88
  6. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/minicpmo_utils.egg-info/PKG-INFO +15 -32
  7. minicpmo_utils-0.1.1/src/minicpmo_utils.egg-info/requires.txt +29 -0
  8. minicpmo_utils-0.0.6/src/minicpmo_utils.egg-info/requires.txt +0 -43
  9. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/README.md +0 -0
  10. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/setup.cfg +0 -0
  11. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/__init__.py +0 -0
  12. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/bin/average_model.py +0 -0
  13. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/bin/export_jit.py +0 -0
  14. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/bin/export_onnx.py +0 -0
  15. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/bin/inference_deprecated.py +0 -0
  16. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/bin/train.py +0 -0
  17. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/cli/__init__.py +0 -0
  18. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/cli/cosyvoice.py +0 -0
  19. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/cli/frontend.py +0 -0
  20. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/cli/model.py +0 -0
  21. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/dataset/__init__.py +0 -0
  22. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/dataset/dataset.py +0 -0
  23. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/dataset/processor.py +0 -0
  24. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/flow/decoder.py +0 -0
  25. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/flow/flow.py +0 -0
  26. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/flow/flow_matching.py +0 -0
  27. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/flow/length_regulator.py +0 -0
  28. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/hifigan/discriminator.py +0 -0
  29. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/hifigan/f0_predictor.py +0 -0
  30. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/hifigan/generator.py +0 -0
  31. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/hifigan/hifigan.py +0 -0
  32. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/llm/llm.py +0 -0
  33. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +0 -0
  34. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/tokenizer/tokenizer.py +0 -0
  35. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/__init__.py +0 -0
  36. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/activation.py +0 -0
  37. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/attention.py +0 -0
  38. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/convolution.py +0 -0
  39. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/decoder.py +0 -0
  40. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/decoder_layer.py +0 -0
  41. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/embedding.py +0 -0
  42. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/encoder.py +0 -0
  43. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/encoder_layer.py +0 -0
  44. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/label_smoothing_loss.py +0 -0
  45. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/positionwise_feed_forward.py +0 -0
  46. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/subsampling.py +0 -0
  47. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/transformer/upsample_encoder.py +0 -0
  48. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/__init__.py +0 -0
  49. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/class_utils.py +0 -0
  50. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/common.py +0 -0
  51. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/executor.py +0 -0
  52. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/frontend_utils.py +0 -0
  53. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/losses.py +0 -0
  54. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/mask.py +0 -0
  55. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/scheduler.py +0 -0
  56. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/utils/train_utils.py +0 -0
  57. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/cosyvoice/vllm/cosyvoice2.py +0 -0
  58. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/__init__.py +0 -0
  59. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/app.py +0 -0
  60. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/cli.py +0 -0
  61. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/hifigan/__init__.py +0 -0
  62. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/hifigan/config.py +0 -0
  63. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/hifigan/denoiser.py +0 -0
  64. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/hifigan/env.py +0 -0
  65. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/hifigan/meldataset.py +0 -0
  66. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/hifigan/models.py +0 -0
  67. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/hifigan/xutils.py +0 -0
  68. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/__init__.py +0 -0
  69. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/baselightningmodule.py +0 -0
  70. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/components/__init__.py +0 -0
  71. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/components/decoder.py +0 -0
  72. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/components/flow_matching.py +0 -0
  73. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/components/text_encoder.py +0 -0
  74. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/components/transformer.py +0 -0
  75. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/models/matcha_tts.py +0 -0
  76. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/onnx/__init__.py +0 -0
  77. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/onnx/export.py +0 -0
  78. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/onnx/infer.py +0 -0
  79. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/text/__init__.py +0 -0
  80. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/text/cleaners.py +0 -0
  81. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/text/numbers.py +0 -0
  82. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/text/symbols.py +0 -0
  83. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/train.py +0 -0
  84. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/__init__.py +0 -0
  85. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/audio.py +0 -0
  86. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/generate_data_statistics.py +0 -0
  87. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/instantiators.py +0 -0
  88. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/logging_utils.py +0 -0
  89. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/model.py +0 -0
  90. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/monotonic_align/__init__.py +0 -0
  91. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/monotonic_align/setup.py +0 -0
  92. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/pylogger.py +0 -0
  93. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/rich_utils.py +0 -0
  94. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/matcha/utils/utils.py +0 -0
  95. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/minicpmo/version.py +0 -0
  96. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/minicpmo_utils.egg-info/SOURCES.txt +0 -0
  97. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/minicpmo_utils.egg-info/dependency_links.txt +0 -0
  98. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/minicpmo_utils.egg-info/top_level.txt +0 -0
  99. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/__init__.py +0 -0
  100. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/assets/BAC009S0764W0121.wav +0 -0
  101. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/assets/BAC009S0764W0122.wav +0 -0
  102. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/assets/mel_filters.npz +0 -0
  103. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/cli.py +0 -0
  104. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/model.py +0 -0
  105. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/model_v2.py +0 -0
  106. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/s3tokenizer/utils.py +0 -0
  107. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/__init__.py +0 -0
  108. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/__init__.py +0 -0
  109. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/flow/__init__.py +0 -0
  110. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/flow/decoder_dit.py +0 -0
  111. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/flow/flow.py +0 -0
  112. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/flow/flow_matching.py +0 -0
  113. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/transformer/__init__.py +0 -0
  114. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/transformer/attention.py +0 -0
  115. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/transformer/embedding.py +0 -0
  116. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/transformer/encoder_layer.py +0 -0
  117. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/transformer/positionwise_feed_forward.py +0 -0
  118. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/transformer/subsampling.py +0 -0
  119. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/transformer/upsample_encoder_v2.py +0 -0
  120. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/utils/__init__.py +0 -0
  121. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/utils/class_utils.py +0 -0
  122. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/utils/common.py +0 -0
  123. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/cosyvoice2/utils/mask.py +0 -0
  124. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/__init__.py +0 -0
  125. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/cli.py +0 -0
  126. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/config.py +0 -0
  127. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/cosyvoice2.py +0 -0
  128. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/cosyvoice3.py +0 -0
  129. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/engine/__init__.py +0 -0
  130. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/engine/block_manager.py +0 -0
  131. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/engine/llm_engine.py +0 -0
  132. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/engine/model_runner.py +0 -0
  133. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/engine/scheduler.py +0 -0
  134. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/engine/sequence.py +0 -0
  135. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/__init__.py +0 -0
  136. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/flow.py +0 -0
  137. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/flow_components/__init__.py +0 -0
  138. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/flow_components/estimator.py +0 -0
  139. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/flow_components/upsample_encoder.py +0 -0
  140. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/hifigan.py +0 -0
  141. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/hifigan_components/__init__.py +0 -0
  142. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/hifigan_components/layers.py +0 -0
  143. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/qwen2.py +0 -0
  144. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/qwen2_components/__init__.py +0 -0
  145. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/qwen2_components/layers.py +0 -0
  146. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/modules/sampler.py +0 -0
  147. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/utils/__init__.py +0 -0
  148. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/utils/audio.py +0 -0
  149. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/utils/context.py +0 -0
  150. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/utils/loader.py +0 -0
  151. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/flashcosyvoice/utils/memory.py +0 -0
  152. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/stepaudio2.py +0 -0
  153. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/token2wav.py +0 -0
  154. {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.1}/src/stepaudio2/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: minicpmo-utils
3
- Version: 0.0.6
3
+ Version: 0.1.1
4
4
  Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
5
5
  Author: MiniCPM-o Utils Maintainers
6
6
  License: Apache-2.0
@@ -14,46 +14,29 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
- Requires-Dist: MarkupSafe>=2.1.0
18
- Requires-Dist: Jinja2>=3.1.0
19
- Requires-Dist: numpy>=1.24.0
20
- Requires-Dist: pillow==10.4.0
21
- Requires-Dist: librosa==0.9.0
22
- Requires-Dist: decord==0.6.0
23
- Requires-Dist: moviepy==2.1.2
24
- Requires-Dist: numba==0.61.2
17
+ Requires-Dist: numpy
18
+ Requires-Dist: pillow
19
+ Requires-Dist: librosa
20
+ Requires-Dist: decord
25
21
  Provides-Extra: tts
26
22
  Requires-Dist: torch>=2.3.0; extra == "tts"
27
23
  Requires-Dist: torchaudio>=2.3.0; extra == "tts"
28
- Requires-Dist: transformers<4.53.0,>=4.51.0; extra == "tts"
29
- Requires-Dist: safetensors>=0.4.3; extra == "tts"
30
- Requires-Dist: onnxruntime<=1.21.0,>=1.18.0; extra == "tts"
31
- Requires-Dist: onnx; extra == "tts"
24
+ Requires-Dist: transformers>=4.49.0; extra == "tts"
32
25
  Requires-Dist: hyperpyyaml; extra == "tts"
33
- Requires-Dist: openai-whisper==20231117; extra == "tts"
34
- Requires-Dist: tqdm>=4.65.0; extra == "tts"
26
+ Requires-Dist: openai-whisper; extra == "tts"
27
+ Requires-Dist: tqdm; extra == "tts"
35
28
  Requires-Dist: tiktoken; extra == "tts"
36
29
  Requires-Dist: inflect; extra == "tts"
37
- Requires-Dist: omegaconf>=2.0.6; extra == "tts"
38
- Requires-Dist: conformer==0.3.2; extra == "tts"
39
- Requires-Dist: einops==0.8.1; extra == "tts"
40
- Requires-Dist: hydra-core; extra == "tts"
41
- Requires-Dist: lightning==2.2.4; extra == "tts"
42
- Requires-Dist: rich; extra == "tts"
43
- Requires-Dist: gdown==5.2.0; extra == "tts"
44
- Requires-Dist: matplotlib; extra == "tts"
45
- Requires-Dist: wget; extra == "tts"
46
- Requires-Dist: pyarrow; extra == "tts"
47
- Requires-Dist: pyworld; extra == "tts"
48
- Requires-Dist: scipy>=1.10.0; extra == "tts"
49
- Requires-Dist: pyyaml; extra == "tts"
50
- Requires-Dist: regex; extra == "tts"
51
- Requires-Dist: soundfile==0.12.1; extra == "tts"
52
- Requires-Dist: diffusers==0.29.0; extra == "tts"
30
+ Requires-Dist: omegaconf; extra == "tts"
31
+ Requires-Dist: einops; extra == "tts"
53
32
  Provides-Extra: streaming
54
33
  Requires-Dist: minicpmo-utils[tts]; extra == "streaming"
34
+ Requires-Dist: onnxruntime>=1.18.0; extra == "streaming"
35
+ Requires-Dist: diffusers; extra == "streaming"
36
+ Provides-Extra: gpu
37
+ Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
55
38
  Provides-Extra: all
56
- Requires-Dist: minicpmo-utils[streaming,tts]; extra == "all"
39
+ Requires-Dist: minicpmo-utils[gpu,streaming,tts]; extra == "all"
57
40
 
58
41
  ## minicpmo-utils
59
42
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "minicpmo-utils"
7
- version = "0.0.6"
7
+ version = "0.1.1"
8
8
  description = "Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -31,15 +31,10 @@ classifiers = [
31
31
  # - s3tokenizer (来自 S3Tokenizer-main)
32
32
  # - minicpmo (本项目扩展 utils 的统一入口:from minicpmo.utils import ...)
33
33
  dependencies = [
34
- # 锁定下限,避免 pip 回退到旧版本触发 setuptools Feature 问题
35
- "MarkupSafe>=2.1.0",
36
- "Jinja2>=3.1.0",
37
- "numpy>=1.24.0",
38
- "pillow==10.4.0",
39
- "librosa==0.9.0",
40
- "decord==0.6.0",
41
- "moviepy==2.1.2",
42
- "numba==0.61.2",
34
+ "numpy",
35
+ "pillow",
36
+ "librosa",
37
+ "decord",
43
38
  ]
44
39
 
45
40
  [project.optional-dependencies]
@@ -47,56 +42,30 @@ dependencies = [
47
42
  tts = [
48
43
  "torch>=2.3.0",
49
44
  "torchaudio>=2.3.0",
50
- "transformers>=4.51.0,<4.53.0", # 4.52+ 有兼容性问题
51
- "safetensors>=0.4.3",
52
- "onnxruntime>=1.18.0,<=1.21.0",
53
- "onnx",
45
+ "transformers>=4.49.0",
54
46
  "hyperpyyaml",
55
- "openai-whisper==20231117",
56
- "tqdm>=4.65.0",
47
+ "openai-whisper",
48
+ "tqdm",
57
49
  "tiktoken",
58
50
  "inflect",
59
- "omegaconf>=2.0.6",
60
- "conformer==0.3.2",
61
- "einops==0.8.1",
62
- "hydra-core",
63
- "lightning==2.2.4",
64
- "rich",
65
- "gdown==5.2.0",
66
- "matplotlib",
67
- "wget",
68
- "pyarrow",
69
- "pyworld",
70
- # 新增依赖
71
- "scipy>=1.10.0",
72
- "pyyaml",
73
- "regex",
74
- "soundfile==0.12.1",
75
- "diffusers==0.29.0"
51
+ "omegaconf",
52
+ "einops",
76
53
  ]
77
54
 
78
- # stepaudio2 基础依赖(token2wav 等)
55
+ # stepaudio2 / streaming 相关依赖
79
56
  streaming = [
80
57
  "minicpmo-utils[tts]", # streaming 依赖 tts
58
+ "onnxruntime>=1.18.0",
59
+ "diffusers",
81
60
  ]
82
61
 
83
- # # stepaudio2 Flash 推理引擎依赖(flashcosyvoice.engine 模块需要)
84
- # streaming-flash = [
85
- # "minicpmo-utils[streaming]",
86
- # "flash-attn>=2.6.0; sys_platform == 'linux'",
87
- # "triton>=2.3.0; sys_platform == 'linux'",
88
- # "safetensors>=0.4.3",
89
- # "pynvml",
90
- # "xxhash",
91
- # ]
92
-
93
- # # Linux GPU onnxruntime 可以很重,且与环境强相关,保留为可选 extra
94
- # gpu = [
95
- # "onnxruntime-gpu>=1.18.0,<=1.23.2; sys_platform == 'linux'",
96
- # ]
62
+ # Linux GPU onnxruntime 可以很重,且与环境强相关,保留为可选 extra
63
+ gpu = [
64
+ "onnxruntime-gpu>=1.18.0; sys_platform == 'linux'",
65
+ ]
97
66
 
98
67
  all = [
99
- "minicpmo-utils[tts,streaming]",
68
+ "minicpmo-utils[tts,streaming,gpu]",
100
69
  ]
101
70
 
102
71
  [tool.setuptools]
@@ -20,8 +20,7 @@ import torch
20
20
  import torchaudio
21
21
  import logging
22
22
  logging.getLogger('matplotlib').setLevel(logging.WARNING)
23
- logging.getLogger('numba').setLevel(logging.WARNING)
24
- logging.basicConfig(level=logging.INFO,
23
+ logging.basicConfig(level=logging.DEBUG,
25
24
  format='%(asctime)s %(levelname)s %(message)s')
26
25
 
27
26
 
@@ -12,3 +12,13 @@
12
12
  """
13
13
 
14
14
  from .version import __version__
15
+
16
+ # Eager re-exports to allow:
17
+ # from minicpmo import cosyvoice, stepaudio2, matcha
18
+ # 而不需要懒加载。
19
+ import cosyvoice as cosyvoice
20
+ import stepaudio2 as stepaudio2
21
+ import matcha as matcha
22
+
23
+ __all__ = ["__version__", "cosyvoice", "stepaudio2", "matcha"]
24
+
@@ -26,84 +26,6 @@ MAX_NUM_FRAMES = int(os.getenv("MAX_NUM_FRAMES", 64))
26
26
  VIDEO_MME_DURATION = os.getenv("VIDEO_MME_DURATION", "ALL")
27
27
 
28
28
 
29
- def find_cjk_font():
30
- """
31
- 查找支持中文的字体。
32
- 按优先级返回字体名称或路径,如果找不到则返回 None。
33
- """
34
- # 常见的中文字体名称(按优先级排列)
35
- font_names = [
36
- # Noto CJK 字体(推荐)
37
- "Noto Sans CJK SC",
38
- "Noto Sans CJK",
39
- "NotoSansCJK-Regular",
40
- # 文泉驿字体
41
- "WenQuanYi Zen Hei",
42
- "WenQuanYi Micro Hei",
43
- "文泉驿正黑",
44
- "文泉驿微米黑",
45
- # 思源字体
46
- "Source Han Sans SC",
47
- "Source Han Sans CN",
48
- # 其他常见中文字体
49
- "SimHei",
50
- "Microsoft YaHei",
51
- "PingFang SC",
52
- "Hiragino Sans GB",
53
- "STHeiti",
54
- "AR PL UMing CN",
55
- "AR PL UKai CN",
56
- ]
57
-
58
- # 常见的中文字体文件路径
59
- font_paths = [
60
- # Noto CJK
61
- "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
62
- "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
63
- "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
64
- "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
65
- # 文泉驿
66
- "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
67
- "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
68
- "/usr/share/fonts/wenquanyi/wqy-zenhei/wqy-zenhei.ttc",
69
- "/usr/share/fonts/wenquanyi/wqy-microhei/wqy-microhei.ttc",
70
- # macOS
71
- "/System/Library/Fonts/PingFang.ttc",
72
- "/Library/Fonts/Arial Unicode.ttf",
73
- # Windows (WSL)
74
- "/mnt/c/Windows/Fonts/msyh.ttc",
75
- "/mnt/c/Windows/Fonts/simhei.ttf",
76
- ]
77
-
78
- # 首先尝试使用 fc-list 查找已安装的中文字体
79
- try:
80
- result = subprocess.run(
81
- ["fc-list", ":lang=zh", "-f", "%{family}\n"],
82
- capture_output=True,
83
- text=True,
84
- timeout=5,
85
- )
86
- if result.returncode == 0:
87
- installed_fonts = set(result.stdout.strip().split("\n"))
88
- for font_name in font_names:
89
- for installed in installed_fonts:
90
- if font_name.lower() in installed.lower():
91
- logger.info(f"Found CJK font via fc-list: {installed}")
92
- return installed.split(",")[0] # 取第一个名称
93
- except Exception as e:
94
- logger.debug(f"fc-list failed: {e}")
95
-
96
- # 然后检查常见的字体文件路径
97
- for path in font_paths:
98
- if os.path.exists(path):
99
- logger.info(f"Found CJK font file: {path}")
100
- return path
101
-
102
- logger.warning("No CJK font found. Chinese subtitles may display as boxes/garbled text.")
103
- logger.warning("Install Chinese fonts with: sudo apt-get install fonts-noto-cjk")
104
- return None
105
-
106
-
107
29
  def concat_images(images, bg_color=(255, 255, 255), cell_size=None, line_color=(0, 0, 0), line_width=6):
108
30
  """
109
31
  images: List[PIL.Image.Image]
@@ -649,18 +571,9 @@ def generate_duplex_video(
649
571
 
650
572
  if has_subtitles:
651
573
  srt_path_escaped = srt_path.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
652
-
653
- # 查找支持中文的字体
654
- cjk_font = find_cjk_font()
655
- font_style = ""
656
- if cjk_font:
657
- # 转义字体路径/名称中的特殊字符
658
- font_escaped = cjk_font.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
659
- font_style = f"FontName={font_escaped},"
660
-
661
574
  subtitle_filter = (
662
575
  f"subtitles='{srt_path_escaped}':"
663
- f"force_style='{font_style}FontSize=28,"
576
+ f"force_style='FontSize=28,"
664
577
  f"PrimaryColour=&H00FFFFFF,"
665
578
  f"OutlineColour=&H00000000,"
666
579
  f"BorderStyle=3,"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: minicpmo-utils
3
- Version: 0.0.6
3
+ Version: 0.1.1
4
4
  Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
5
5
  Author: MiniCPM-o Utils Maintainers
6
6
  License: Apache-2.0
@@ -14,46 +14,29 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
- Requires-Dist: MarkupSafe>=2.1.0
18
- Requires-Dist: Jinja2>=3.1.0
19
- Requires-Dist: numpy>=1.24.0
20
- Requires-Dist: pillow==10.4.0
21
- Requires-Dist: librosa==0.9.0
22
- Requires-Dist: decord==0.6.0
23
- Requires-Dist: moviepy==2.1.2
24
- Requires-Dist: numba==0.61.2
17
+ Requires-Dist: numpy
18
+ Requires-Dist: pillow
19
+ Requires-Dist: librosa
20
+ Requires-Dist: decord
25
21
  Provides-Extra: tts
26
22
  Requires-Dist: torch>=2.3.0; extra == "tts"
27
23
  Requires-Dist: torchaudio>=2.3.0; extra == "tts"
28
- Requires-Dist: transformers<4.53.0,>=4.51.0; extra == "tts"
29
- Requires-Dist: safetensors>=0.4.3; extra == "tts"
30
- Requires-Dist: onnxruntime<=1.21.0,>=1.18.0; extra == "tts"
31
- Requires-Dist: onnx; extra == "tts"
24
+ Requires-Dist: transformers>=4.49.0; extra == "tts"
32
25
  Requires-Dist: hyperpyyaml; extra == "tts"
33
- Requires-Dist: openai-whisper==20231117; extra == "tts"
34
- Requires-Dist: tqdm>=4.65.0; extra == "tts"
26
+ Requires-Dist: openai-whisper; extra == "tts"
27
+ Requires-Dist: tqdm; extra == "tts"
35
28
  Requires-Dist: tiktoken; extra == "tts"
36
29
  Requires-Dist: inflect; extra == "tts"
37
- Requires-Dist: omegaconf>=2.0.6; extra == "tts"
38
- Requires-Dist: conformer==0.3.2; extra == "tts"
39
- Requires-Dist: einops==0.8.1; extra == "tts"
40
- Requires-Dist: hydra-core; extra == "tts"
41
- Requires-Dist: lightning==2.2.4; extra == "tts"
42
- Requires-Dist: rich; extra == "tts"
43
- Requires-Dist: gdown==5.2.0; extra == "tts"
44
- Requires-Dist: matplotlib; extra == "tts"
45
- Requires-Dist: wget; extra == "tts"
46
- Requires-Dist: pyarrow; extra == "tts"
47
- Requires-Dist: pyworld; extra == "tts"
48
- Requires-Dist: scipy>=1.10.0; extra == "tts"
49
- Requires-Dist: pyyaml; extra == "tts"
50
- Requires-Dist: regex; extra == "tts"
51
- Requires-Dist: soundfile==0.12.1; extra == "tts"
52
- Requires-Dist: diffusers==0.29.0; extra == "tts"
30
+ Requires-Dist: omegaconf; extra == "tts"
31
+ Requires-Dist: einops; extra == "tts"
53
32
  Provides-Extra: streaming
54
33
  Requires-Dist: minicpmo-utils[tts]; extra == "streaming"
34
+ Requires-Dist: onnxruntime>=1.18.0; extra == "streaming"
35
+ Requires-Dist: diffusers; extra == "streaming"
36
+ Provides-Extra: gpu
37
+ Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
55
38
  Provides-Extra: all
56
- Requires-Dist: minicpmo-utils[streaming,tts]; extra == "all"
39
+ Requires-Dist: minicpmo-utils[gpu,streaming,tts]; extra == "all"
57
40
 
58
41
  ## minicpmo-utils
59
42
 
@@ -0,0 +1,29 @@
1
+ numpy
2
+ pillow
3
+ librosa
4
+ decord
5
+
6
+ [all]
7
+ minicpmo-utils[gpu,streaming,tts]
8
+
9
+ [gpu]
10
+
11
+ [gpu:sys_platform == "linux"]
12
+ onnxruntime-gpu>=1.18.0
13
+
14
+ [streaming]
15
+ minicpmo-utils[tts]
16
+ onnxruntime>=1.18.0
17
+ diffusers
18
+
19
+ [tts]
20
+ torch>=2.3.0
21
+ torchaudio>=2.3.0
22
+ transformers>=4.49.0
23
+ hyperpyyaml
24
+ openai-whisper
25
+ tqdm
26
+ tiktoken
27
+ inflect
28
+ omegaconf
29
+ einops
@@ -1,43 +0,0 @@
1
- MarkupSafe>=2.1.0
2
- Jinja2>=3.1.0
3
- numpy>=1.24.0
4
- pillow==10.4.0
5
- librosa==0.9.0
6
- decord==0.6.0
7
- moviepy==2.1.2
8
- numba==0.61.2
9
-
10
- [all]
11
- minicpmo-utils[streaming,tts]
12
-
13
- [streaming]
14
- minicpmo-utils[tts]
15
-
16
- [tts]
17
- torch>=2.3.0
18
- torchaudio>=2.3.0
19
- transformers<4.53.0,>=4.51.0
20
- safetensors>=0.4.3
21
- onnxruntime<=1.21.0,>=1.18.0
22
- onnx
23
- hyperpyyaml
24
- openai-whisper==20231117
25
- tqdm>=4.65.0
26
- tiktoken
27
- inflect
28
- omegaconf>=2.0.6
29
- conformer==0.3.2
30
- einops==0.8.1
31
- hydra-core
32
- lightning==2.2.4
33
- rich
34
- gdown==5.2.0
35
- matplotlib
36
- wget
37
- pyarrow
38
- pyworld
39
- scipy>=1.10.0
40
- pyyaml
41
- regex
42
- soundfile==0.12.1
43
- diffusers==0.29.0
File without changes
File without changes