minicpmo-utils 0.0.6__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minicpmo_utils-0.1.0/PKG-INFO +72 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/README.md +2 -17
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/pyproject.toml +21 -59
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/cosyvoice.py +5 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/file_utils.py +1 -2
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/minicpmo/__init__.py +10 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/minicpmo/utils.py +1 -88
- minicpmo_utils-0.1.0/src/minicpmo_utils.egg-info/PKG-INFO +72 -0
- minicpmo_utils-0.1.0/src/minicpmo_utils.egg-info/requires.txt +20 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/token2wav.py +1 -0
- minicpmo_utils-0.0.6/PKG-INFO +0 -110
- minicpmo_utils-0.0.6/src/minicpmo_utils.egg-info/PKG-INFO +0 -110
- minicpmo_utils-0.0.6/src/minicpmo_utils.egg-info/requires.txt +0 -43
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/setup.cfg +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/average_model.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/export_jit.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/export_onnx.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/inference_deprecated.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/bin/train.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/frontend.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/model.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/dataset/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/dataset/dataset.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/dataset/processor.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/decoder.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/flow.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/flow_matching.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/flow/length_regulator.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/discriminator.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/f0_predictor.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/generator.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/hifigan/hifigan.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/llm/llm.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/tokenizer/tokenizer.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/activation.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/attention.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/convolution.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/decoder.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/decoder_layer.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/embedding.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/encoder.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/encoder_layer.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/label_smoothing_loss.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/positionwise_feed_forward.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/subsampling.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/transformer/upsample_encoder.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/class_utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/common.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/executor.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/frontend_utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/losses.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/mask.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/scheduler.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/train_utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/cosyvoice/vllm/cosyvoice2.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/app.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/cli.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/hifigan/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/hifigan/config.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/hifigan/denoiser.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/hifigan/env.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/hifigan/meldataset.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/hifigan/models.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/hifigan/xutils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/baselightningmodule.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/components/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/components/decoder.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/components/flow_matching.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/components/text_encoder.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/components/transformer.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/models/matcha_tts.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/onnx/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/onnx/export.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/onnx/infer.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/text/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/text/cleaners.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/text/numbers.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/text/symbols.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/train.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/audio.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/generate_data_statistics.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/instantiators.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/logging_utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/model.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/monotonic_align/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/monotonic_align/setup.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/pylogger.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/rich_utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/matcha/utils/utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/minicpmo/version.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/minicpmo_utils.egg-info/SOURCES.txt +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/minicpmo_utils.egg-info/dependency_links.txt +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/minicpmo_utils.egg-info/top_level.txt +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/assets/BAC009S0764W0121.wav +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/assets/BAC009S0764W0122.wav +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/assets/mel_filters.npz +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/cli.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/model.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/model_v2.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/s3tokenizer/utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/decoder_dit.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/flow.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/flow/flow_matching.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/attention.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/embedding.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/encoder_layer.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/positionwise_feed_forward.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/subsampling.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/transformer/upsample_encoder_v2.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/class_utils.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/common.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/cosyvoice2/utils/mask.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/cli.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/config.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/cosyvoice2.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/cosyvoice3.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/block_manager.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/llm_engine.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/model_runner.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/scheduler.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/engine/sequence.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow_components/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow_components/estimator.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/flow_components/upsample_encoder.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/hifigan.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/hifigan_components/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/hifigan_components/layers.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/qwen2.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/qwen2_components/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/qwen2_components/layers.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/modules/sampler.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/__init__.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/audio.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/context.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/loader.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/flashcosyvoice/utils/memory.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/stepaudio2.py +0 -0
- {minicpmo_utils-0.0.6 → minicpmo_utils-0.1.0}/src/stepaudio2/utils.py +0 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: minicpmo-utils
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
|
|
5
|
+
Author: MiniCPM-o Utils Maintainers
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: torch>=2.3.0
|
|
18
|
+
Requires-Dist: torchaudio>=2.3.0
|
|
19
|
+
Requires-Dist: transformers>=4.49.0
|
|
20
|
+
Requires-Dist: numpy
|
|
21
|
+
Requires-Dist: hyperpyyaml
|
|
22
|
+
Requires-Dist: modelscope
|
|
23
|
+
Requires-Dist: openai-whisper
|
|
24
|
+
Requires-Dist: tqdm
|
|
25
|
+
Requires-Dist: tiktoken
|
|
26
|
+
Requires-Dist: inflect
|
|
27
|
+
Requires-Dist: omegaconf
|
|
28
|
+
Requires-Dist: einops
|
|
29
|
+
Requires-Dist: librosa
|
|
30
|
+
Requires-Dist: onnxruntime>=1.18.0
|
|
31
|
+
Requires-Dist: diffusers
|
|
32
|
+
Provides-Extra: gpu
|
|
33
|
+
Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
|
|
34
|
+
|
|
35
|
+
## minicpmo-utils
|
|
36
|
+
|
|
37
|
+
一个统一安装的工具包(一个 PyPI 分发包),把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel,并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
|
|
38
|
+
|
|
39
|
+
### 安装方式
|
|
40
|
+
|
|
41
|
+
- 从源码本地安装(开发态,可编辑):
|
|
42
|
+
```bash
|
|
43
|
+
cd minicpmo-utils
|
|
44
|
+
pip install -e .
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
- 构建并安装 wheel(推荐分发):
|
|
48
|
+
```bash
|
|
49
|
+
cd minicpmo-utils
|
|
50
|
+
python -m build # 生成 dist/*.whl
|
|
51
|
+
pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### 导入方式
|
|
55
|
+
|
|
56
|
+
包会暴露以下顶层模块,安装后可直接使用:
|
|
57
|
+
- `import cosyvoice`
|
|
58
|
+
- `import stepaudio2`
|
|
59
|
+
- `import matcha`
|
|
60
|
+
- `import minicpmo`
|
|
61
|
+
|
|
62
|
+
也支持通过统一入口导入子包:
|
|
63
|
+
```python
|
|
64
|
+
from minicpmo import cosyvoice, stepaudio2, matcha
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
以及通过统一的 utils 入口使用通用工具函数,例如:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from minicpmo.utils import get_video_frame_audio_segments
|
|
71
|
+
```
|
|
72
|
+
|
|
@@ -4,32 +4,17 @@
|
|
|
4
4
|
|
|
5
5
|
### 安装方式
|
|
6
6
|
|
|
7
|
-
-
|
|
7
|
+
- 从源码本地安装(开发态,可编辑):
|
|
8
8
|
```bash
|
|
9
9
|
cd minicpmo-utils
|
|
10
10
|
pip install -e .
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
- 如果只想安装 cosyvoice 相关依赖(TTS):
|
|
14
|
-
```bash
|
|
15
|
-
pip install -e .[tts]
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
- 如果只想安装 stepaudio2 / streaming 相关依赖:
|
|
19
|
-
```bash
|
|
20
|
-
pip install -e .[streaming]
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
- 同时安装 cosyvoice + stepaudio2 相关依赖:
|
|
24
|
-
```bash
|
|
25
|
-
pip install -e .[tts,streaming]
|
|
26
|
-
```
|
|
27
|
-
|
|
28
13
|
- 构建并安装 wheel(推荐分发):
|
|
29
14
|
```bash
|
|
30
15
|
cd minicpmo-utils
|
|
31
16
|
python -m build # 生成 dist/*.whl
|
|
32
|
-
pip install
|
|
17
|
+
pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
|
|
33
18
|
```
|
|
34
19
|
|
|
35
20
|
### 导入方式
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "minicpmo-utils"
|
|
7
|
-
version = "0.0
|
|
7
|
+
version = "0.1.0"
|
|
8
8
|
description = "Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -31,72 +31,34 @@ classifiers = [
|
|
|
31
31
|
# - s3tokenizer (来自 S3Tokenizer-main)
|
|
32
32
|
# - minicpmo (本项目扩展 utils 的统一入口:from minicpmo.utils import ...)
|
|
33
33
|
dependencies = [
|
|
34
|
-
#
|
|
35
|
-
"MarkupSafe>=2.1.0",
|
|
36
|
-
"Jinja2>=3.1.0",
|
|
37
|
-
"numpy>=1.24.0",
|
|
38
|
-
"pillow==10.4.0",
|
|
39
|
-
"librosa==0.9.0",
|
|
40
|
-
"decord==0.6.0",
|
|
41
|
-
"moviepy==2.1.2",
|
|
42
|
-
"numba==0.61.2",
|
|
43
|
-
]
|
|
44
|
-
|
|
45
|
-
[project.optional-dependencies]
|
|
46
|
-
# cosyvoice TTS 相关依赖
|
|
47
|
-
tts = [
|
|
34
|
+
# Core ML frameworks (align to stepaudio2's requirements)
|
|
48
35
|
"torch>=2.3.0",
|
|
49
36
|
"torchaudio>=2.3.0",
|
|
50
|
-
"transformers>=4.
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
"
|
|
37
|
+
"transformers>=4.49.0",
|
|
38
|
+
|
|
39
|
+
# Shared / common
|
|
40
|
+
"numpy",
|
|
54
41
|
"hyperpyyaml",
|
|
55
|
-
|
|
56
|
-
|
|
42
|
+
|
|
43
|
+
# cosyvoice side
|
|
44
|
+
"modelscope",
|
|
45
|
+
"openai-whisper",
|
|
46
|
+
"tqdm",
|
|
57
47
|
"tiktoken",
|
|
58
48
|
"inflect",
|
|
59
|
-
"omegaconf
|
|
60
|
-
"
|
|
61
|
-
"einops==0.8.1",
|
|
62
|
-
"hydra-core",
|
|
63
|
-
"lightning==2.2.4",
|
|
64
|
-
"rich",
|
|
65
|
-
"gdown==5.2.0",
|
|
66
|
-
"matplotlib",
|
|
67
|
-
"wget",
|
|
68
|
-
"pyarrow",
|
|
69
|
-
"pyworld",
|
|
70
|
-
# 新增依赖
|
|
71
|
-
"scipy>=1.10.0",
|
|
72
|
-
"pyyaml",
|
|
73
|
-
"regex",
|
|
74
|
-
"soundfile==0.12.1",
|
|
75
|
-
"diffusers==0.29.0"
|
|
76
|
-
]
|
|
49
|
+
"omegaconf",
|
|
50
|
+
"einops",
|
|
77
51
|
|
|
78
|
-
# stepaudio2
|
|
79
|
-
|
|
80
|
-
"
|
|
52
|
+
# stepaudio2 side
|
|
53
|
+
"librosa",
|
|
54
|
+
"onnxruntime>=1.18.0",
|
|
55
|
+
"diffusers",
|
|
81
56
|
]
|
|
82
57
|
|
|
83
|
-
|
|
84
|
-
#
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
# "triton>=2.3.0; sys_platform == 'linux'",
|
|
88
|
-
# "safetensors>=0.4.3",
|
|
89
|
-
# "pynvml",
|
|
90
|
-
# "xxhash",
|
|
91
|
-
# ]
|
|
92
|
-
|
|
93
|
-
# # Linux GPU onnxruntime 可以很重,且与环境强相关,保留为可选 extra
|
|
94
|
-
# gpu = [
|
|
95
|
-
# "onnxruntime-gpu>=1.18.0,<=1.23.2; sys_platform == 'linux'",
|
|
96
|
-
# ]
|
|
97
|
-
|
|
98
|
-
all = [
|
|
99
|
-
"minicpmo-utils[tts,streaming]",
|
|
58
|
+
[project.optional-dependencies]
|
|
59
|
+
# Linux GPU onnxruntime can be heavy and environment-specific; keep as an opt-in extra.
|
|
60
|
+
gpu = [
|
|
61
|
+
"onnxruntime-gpu>=1.18.0; sys_platform == 'linux'",
|
|
100
62
|
]
|
|
101
63
|
|
|
102
64
|
[tool.setuptools]
|
|
@@ -16,6 +16,7 @@ import time
|
|
|
16
16
|
from typing import Generator
|
|
17
17
|
from tqdm import tqdm
|
|
18
18
|
from hyperpyyaml import load_hyperpyyaml
|
|
19
|
+
from modelscope import snapshot_download
|
|
19
20
|
import torch
|
|
20
21
|
from cosyvoice.cli.frontend import CosyVoiceFrontEnd
|
|
21
22
|
from cosyvoice.cli.model import CosyVoiceModel, CosyVoice2Model
|
|
@@ -29,6 +30,8 @@ class CosyVoice:
|
|
|
29
30
|
self.instruct = True if '-Instruct' in model_dir else False
|
|
30
31
|
self.model_dir = model_dir
|
|
31
32
|
self.fp16 = fp16
|
|
33
|
+
if not os.path.exists(model_dir):
|
|
34
|
+
model_dir = snapshot_download(model_dir)
|
|
32
35
|
hyper_yaml_path = '{}/cosyvoice.yaml'.format(model_dir)
|
|
33
36
|
if not os.path.exists(hyper_yaml_path):
|
|
34
37
|
raise ValueError('{} not found!'.format(hyper_yaml_path))
|
|
@@ -151,6 +154,8 @@ class CosyVoice2(CosyVoice):
|
|
|
151
154
|
self.instruct = True if '-Instruct' in model_dir else False
|
|
152
155
|
self.model_dir = model_dir
|
|
153
156
|
self.fp16 = fp16
|
|
157
|
+
if not os.path.exists(model_dir):
|
|
158
|
+
model_dir = snapshot_download(model_dir)
|
|
154
159
|
|
|
155
160
|
if config_path is None:
|
|
156
161
|
config_path = f'{model_dir}/cosyvoice2.yaml'
|
|
@@ -20,8 +20,7 @@ import torch
|
|
|
20
20
|
import torchaudio
|
|
21
21
|
import logging
|
|
22
22
|
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
|
23
|
-
logging.
|
|
24
|
-
logging.basicConfig(level=logging.INFO,
|
|
23
|
+
logging.basicConfig(level=logging.DEBUG,
|
|
25
24
|
format='%(asctime)s %(levelname)s %(message)s')
|
|
26
25
|
|
|
27
26
|
|
|
@@ -12,3 +12,13 @@
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
from .version import __version__
|
|
15
|
+
|
|
16
|
+
# Eager re-exports to allow:
|
|
17
|
+
# from minicpmo import cosyvoice, stepaudio2, matcha
|
|
18
|
+
# 而不需要懒加载。
|
|
19
|
+
import cosyvoice as cosyvoice
|
|
20
|
+
import stepaudio2 as stepaudio2
|
|
21
|
+
import matcha as matcha
|
|
22
|
+
|
|
23
|
+
__all__ = ["__version__", "cosyvoice", "stepaudio2", "matcha"]
|
|
24
|
+
|
|
@@ -26,84 +26,6 @@ MAX_NUM_FRAMES = int(os.getenv("MAX_NUM_FRAMES", 64))
|
|
|
26
26
|
VIDEO_MME_DURATION = os.getenv("VIDEO_MME_DURATION", "ALL")
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def find_cjk_font():
|
|
30
|
-
"""
|
|
31
|
-
查找支持中文的字体。
|
|
32
|
-
按优先级返回字体名称或路径,如果找不到则返回 None。
|
|
33
|
-
"""
|
|
34
|
-
# 常见的中文字体名称(按优先级排列)
|
|
35
|
-
font_names = [
|
|
36
|
-
# Noto CJK 字体(推荐)
|
|
37
|
-
"Noto Sans CJK SC",
|
|
38
|
-
"Noto Sans CJK",
|
|
39
|
-
"NotoSansCJK-Regular",
|
|
40
|
-
# 文泉驿字体
|
|
41
|
-
"WenQuanYi Zen Hei",
|
|
42
|
-
"WenQuanYi Micro Hei",
|
|
43
|
-
"文泉驿正黑",
|
|
44
|
-
"文泉驿微米黑",
|
|
45
|
-
# 思源字体
|
|
46
|
-
"Source Han Sans SC",
|
|
47
|
-
"Source Han Sans CN",
|
|
48
|
-
# 其他常见中文字体
|
|
49
|
-
"SimHei",
|
|
50
|
-
"Microsoft YaHei",
|
|
51
|
-
"PingFang SC",
|
|
52
|
-
"Hiragino Sans GB",
|
|
53
|
-
"STHeiti",
|
|
54
|
-
"AR PL UMing CN",
|
|
55
|
-
"AR PL UKai CN",
|
|
56
|
-
]
|
|
57
|
-
|
|
58
|
-
# 常见的中文字体文件路径
|
|
59
|
-
font_paths = [
|
|
60
|
-
# Noto CJK
|
|
61
|
-
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
|
62
|
-
"/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
|
|
63
|
-
"/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
|
|
64
|
-
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
|
|
65
|
-
# 文泉驿
|
|
66
|
-
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
|
|
67
|
-
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
|
|
68
|
-
"/usr/share/fonts/wenquanyi/wqy-zenhei/wqy-zenhei.ttc",
|
|
69
|
-
"/usr/share/fonts/wenquanyi/wqy-microhei/wqy-microhei.ttc",
|
|
70
|
-
# macOS
|
|
71
|
-
"/System/Library/Fonts/PingFang.ttc",
|
|
72
|
-
"/Library/Fonts/Arial Unicode.ttf",
|
|
73
|
-
# Windows (WSL)
|
|
74
|
-
"/mnt/c/Windows/Fonts/msyh.ttc",
|
|
75
|
-
"/mnt/c/Windows/Fonts/simhei.ttf",
|
|
76
|
-
]
|
|
77
|
-
|
|
78
|
-
# 首先尝试使用 fc-list 查找已安装的中文字体
|
|
79
|
-
try:
|
|
80
|
-
result = subprocess.run(
|
|
81
|
-
["fc-list", ":lang=zh", "-f", "%{family}\n"],
|
|
82
|
-
capture_output=True,
|
|
83
|
-
text=True,
|
|
84
|
-
timeout=5,
|
|
85
|
-
)
|
|
86
|
-
if result.returncode == 0:
|
|
87
|
-
installed_fonts = set(result.stdout.strip().split("\n"))
|
|
88
|
-
for font_name in font_names:
|
|
89
|
-
for installed in installed_fonts:
|
|
90
|
-
if font_name.lower() in installed.lower():
|
|
91
|
-
logger.info(f"Found CJK font via fc-list: {installed}")
|
|
92
|
-
return installed.split(",")[0] # 取第一个名称
|
|
93
|
-
except Exception as e:
|
|
94
|
-
logger.debug(f"fc-list failed: {e}")
|
|
95
|
-
|
|
96
|
-
# 然后检查常见的字体文件路径
|
|
97
|
-
for path in font_paths:
|
|
98
|
-
if os.path.exists(path):
|
|
99
|
-
logger.info(f"Found CJK font file: {path}")
|
|
100
|
-
return path
|
|
101
|
-
|
|
102
|
-
logger.warning("No CJK font found. Chinese subtitles may display as boxes/garbled text.")
|
|
103
|
-
logger.warning("Install Chinese fonts with: sudo apt-get install fonts-noto-cjk")
|
|
104
|
-
return None
|
|
105
|
-
|
|
106
|
-
|
|
107
29
|
def concat_images(images, bg_color=(255, 255, 255), cell_size=None, line_color=(0, 0, 0), line_width=6):
|
|
108
30
|
"""
|
|
109
31
|
images: List[PIL.Image.Image]
|
|
@@ -649,18 +571,9 @@ def generate_duplex_video(
|
|
|
649
571
|
|
|
650
572
|
if has_subtitles:
|
|
651
573
|
srt_path_escaped = srt_path.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
|
|
652
|
-
|
|
653
|
-
# 查找支持中文的字体
|
|
654
|
-
cjk_font = find_cjk_font()
|
|
655
|
-
font_style = ""
|
|
656
|
-
if cjk_font:
|
|
657
|
-
# 转义字体路径/名称中的特殊字符
|
|
658
|
-
font_escaped = cjk_font.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
|
|
659
|
-
font_style = f"FontName={font_escaped},"
|
|
660
|
-
|
|
661
574
|
subtitle_filter = (
|
|
662
575
|
f"subtitles='{srt_path_escaped}':"
|
|
663
|
-
f"force_style='
|
|
576
|
+
f"force_style='FontSize=28,"
|
|
664
577
|
f"PrimaryColour=&H00FFFFFF,"
|
|
665
578
|
f"OutlineColour=&H00000000,"
|
|
666
579
|
f"BorderStyle=3,"
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: minicpmo-utils
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
|
|
5
|
+
Author: MiniCPM-o Utils Maintainers
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: torch>=2.3.0
|
|
18
|
+
Requires-Dist: torchaudio>=2.3.0
|
|
19
|
+
Requires-Dist: transformers>=4.49.0
|
|
20
|
+
Requires-Dist: numpy
|
|
21
|
+
Requires-Dist: hyperpyyaml
|
|
22
|
+
Requires-Dist: modelscope
|
|
23
|
+
Requires-Dist: openai-whisper
|
|
24
|
+
Requires-Dist: tqdm
|
|
25
|
+
Requires-Dist: tiktoken
|
|
26
|
+
Requires-Dist: inflect
|
|
27
|
+
Requires-Dist: omegaconf
|
|
28
|
+
Requires-Dist: einops
|
|
29
|
+
Requires-Dist: librosa
|
|
30
|
+
Requires-Dist: onnxruntime>=1.18.0
|
|
31
|
+
Requires-Dist: diffusers
|
|
32
|
+
Provides-Extra: gpu
|
|
33
|
+
Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
|
|
34
|
+
|
|
35
|
+
## minicpmo-utils
|
|
36
|
+
|
|
37
|
+
一个统一安装的工具包(一个 PyPI 分发包),把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel,并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
|
|
38
|
+
|
|
39
|
+
### 安装方式
|
|
40
|
+
|
|
41
|
+
- 从源码本地安装(开发态,可编辑):
|
|
42
|
+
```bash
|
|
43
|
+
cd minicpmo-utils
|
|
44
|
+
pip install -e .
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
- 构建并安装 wheel(推荐分发):
|
|
48
|
+
```bash
|
|
49
|
+
cd minicpmo-utils
|
|
50
|
+
python -m build # 生成 dist/*.whl
|
|
51
|
+
pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### 导入方式
|
|
55
|
+
|
|
56
|
+
包会暴露以下顶层模块,安装后可直接使用:
|
|
57
|
+
- `import cosyvoice`
|
|
58
|
+
- `import stepaudio2`
|
|
59
|
+
- `import matcha`
|
|
60
|
+
- `import minicpmo`
|
|
61
|
+
|
|
62
|
+
也支持通过统一入口导入子包:
|
|
63
|
+
```python
|
|
64
|
+
from minicpmo import cosyvoice, stepaudio2, matcha
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
以及通过统一的 utils 入口使用通用工具函数,例如:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from minicpmo.utils import get_video_frame_audio_segments
|
|
71
|
+
```
|
|
72
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
torch>=2.3.0
|
|
2
|
+
torchaudio>=2.3.0
|
|
3
|
+
transformers>=4.49.0
|
|
4
|
+
numpy
|
|
5
|
+
hyperpyyaml
|
|
6
|
+
modelscope
|
|
7
|
+
openai-whisper
|
|
8
|
+
tqdm
|
|
9
|
+
tiktoken
|
|
10
|
+
inflect
|
|
11
|
+
omegaconf
|
|
12
|
+
einops
|
|
13
|
+
librosa
|
|
14
|
+
onnxruntime>=1.18.0
|
|
15
|
+
diffusers
|
|
16
|
+
|
|
17
|
+
[gpu]
|
|
18
|
+
|
|
19
|
+
[gpu:sys_platform == "linux"]
|
|
20
|
+
onnxruntime-gpu>=1.18.0
|
minicpmo_utils-0.0.6/PKG-INFO
DELETED
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: minicpmo-utils
|
|
3
|
-
Version: 0.0.6
|
|
4
|
-
Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
|
|
5
|
-
Author: MiniCPM-o Utils Maintainers
|
|
6
|
-
License: Apache-2.0
|
|
7
|
-
Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
|
|
8
|
-
Classifier: Development Status :: 4 - Beta
|
|
9
|
-
Classifier: Intended Audience :: Developers
|
|
10
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
Requires-Python: >=3.10
|
|
16
|
-
Description-Content-Type: text/markdown
|
|
17
|
-
Requires-Dist: MarkupSafe>=2.1.0
|
|
18
|
-
Requires-Dist: Jinja2>=3.1.0
|
|
19
|
-
Requires-Dist: numpy>=1.24.0
|
|
20
|
-
Requires-Dist: pillow==10.4.0
|
|
21
|
-
Requires-Dist: librosa==0.9.0
|
|
22
|
-
Requires-Dist: decord==0.6.0
|
|
23
|
-
Requires-Dist: moviepy==2.1.2
|
|
24
|
-
Requires-Dist: numba==0.61.2
|
|
25
|
-
Provides-Extra: tts
|
|
26
|
-
Requires-Dist: torch>=2.3.0; extra == "tts"
|
|
27
|
-
Requires-Dist: torchaudio>=2.3.0; extra == "tts"
|
|
28
|
-
Requires-Dist: transformers<4.53.0,>=4.51.0; extra == "tts"
|
|
29
|
-
Requires-Dist: safetensors>=0.4.3; extra == "tts"
|
|
30
|
-
Requires-Dist: onnxruntime<=1.21.0,>=1.18.0; extra == "tts"
|
|
31
|
-
Requires-Dist: onnx; extra == "tts"
|
|
32
|
-
Requires-Dist: hyperpyyaml; extra == "tts"
|
|
33
|
-
Requires-Dist: openai-whisper==20231117; extra == "tts"
|
|
34
|
-
Requires-Dist: tqdm>=4.65.0; extra == "tts"
|
|
35
|
-
Requires-Dist: tiktoken; extra == "tts"
|
|
36
|
-
Requires-Dist: inflect; extra == "tts"
|
|
37
|
-
Requires-Dist: omegaconf>=2.0.6; extra == "tts"
|
|
38
|
-
Requires-Dist: conformer==0.3.2; extra == "tts"
|
|
39
|
-
Requires-Dist: einops==0.8.1; extra == "tts"
|
|
40
|
-
Requires-Dist: hydra-core; extra == "tts"
|
|
41
|
-
Requires-Dist: lightning==2.2.4; extra == "tts"
|
|
42
|
-
Requires-Dist: rich; extra == "tts"
|
|
43
|
-
Requires-Dist: gdown==5.2.0; extra == "tts"
|
|
44
|
-
Requires-Dist: matplotlib; extra == "tts"
|
|
45
|
-
Requires-Dist: wget; extra == "tts"
|
|
46
|
-
Requires-Dist: pyarrow; extra == "tts"
|
|
47
|
-
Requires-Dist: pyworld; extra == "tts"
|
|
48
|
-
Requires-Dist: scipy>=1.10.0; extra == "tts"
|
|
49
|
-
Requires-Dist: pyyaml; extra == "tts"
|
|
50
|
-
Requires-Dist: regex; extra == "tts"
|
|
51
|
-
Requires-Dist: soundfile==0.12.1; extra == "tts"
|
|
52
|
-
Requires-Dist: diffusers==0.29.0; extra == "tts"
|
|
53
|
-
Provides-Extra: streaming
|
|
54
|
-
Requires-Dist: minicpmo-utils[tts]; extra == "streaming"
|
|
55
|
-
Provides-Extra: all
|
|
56
|
-
Requires-Dist: minicpmo-utils[streaming,tts]; extra == "all"
|
|
57
|
-
|
|
58
|
-
## minicpmo-utils
|
|
59
|
-
|
|
60
|
-
一个统一安装的工具包(一个 PyPI 分发包),把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel,并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
|
|
61
|
-
|
|
62
|
-
### 安装方式
|
|
63
|
-
|
|
64
|
-
- 从源码本地安装(开发态,可编辑,默认只装公共依赖):
|
|
65
|
-
```bash
|
|
66
|
-
cd minicpmo-utils
|
|
67
|
-
pip install -e .
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
- 如果只想安装 cosyvoice 相关依赖(TTS):
|
|
71
|
-
```bash
|
|
72
|
-
pip install -e .[tts]
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
- 如果只想安装 stepaudio2 / streaming 相关依赖:
|
|
76
|
-
```bash
|
|
77
|
-
pip install -e .[streaming]
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
- 同时安装 cosyvoice + stepaudio2 相关依赖:
|
|
81
|
-
```bash
|
|
82
|
-
pip install -e .[tts,streaming]
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
- 构建并安装 wheel(推荐分发):
|
|
86
|
-
```bash
|
|
87
|
-
cd minicpmo-utils
|
|
88
|
-
python -m build # 生成 dist/*.whl
|
|
89
|
-
pip install \"dist/minicpmo_utils-0.1.0-py3-none-any.whl[tts,streaming]\"
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### 导入方式
|
|
93
|
-
|
|
94
|
-
包会暴露以下顶层模块,安装后可直接使用:
|
|
95
|
-
- `import cosyvoice`
|
|
96
|
-
- `import stepaudio2`
|
|
97
|
-
- `import matcha`
|
|
98
|
-
- `import minicpmo`
|
|
99
|
-
|
|
100
|
-
也支持通过统一入口导入子包:
|
|
101
|
-
```python
|
|
102
|
-
from minicpmo import cosyvoice, stepaudio2, matcha
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
以及通过统一的 utils 入口使用通用工具函数,例如:
|
|
106
|
-
|
|
107
|
-
```python
|
|
108
|
-
from minicpmo.utils import get_video_frame_audio_segments
|
|
109
|
-
```
|
|
110
|
-
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: minicpmo-utils
|
|
3
|
-
Version: 0.0.6
|
|
4
|
-
Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
|
|
5
|
-
Author: MiniCPM-o Utils Maintainers
|
|
6
|
-
License: Apache-2.0
|
|
7
|
-
Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
|
|
8
|
-
Classifier: Development Status :: 4 - Beta
|
|
9
|
-
Classifier: Intended Audience :: Developers
|
|
10
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
-
Requires-Python: >=3.10
|
|
16
|
-
Description-Content-Type: text/markdown
|
|
17
|
-
Requires-Dist: MarkupSafe>=2.1.0
|
|
18
|
-
Requires-Dist: Jinja2>=3.1.0
|
|
19
|
-
Requires-Dist: numpy>=1.24.0
|
|
20
|
-
Requires-Dist: pillow==10.4.0
|
|
21
|
-
Requires-Dist: librosa==0.9.0
|
|
22
|
-
Requires-Dist: decord==0.6.0
|
|
23
|
-
Requires-Dist: moviepy==2.1.2
|
|
24
|
-
Requires-Dist: numba==0.61.2
|
|
25
|
-
Provides-Extra: tts
|
|
26
|
-
Requires-Dist: torch>=2.3.0; extra == "tts"
|
|
27
|
-
Requires-Dist: torchaudio>=2.3.0; extra == "tts"
|
|
28
|
-
Requires-Dist: transformers<4.53.0,>=4.51.0; extra == "tts"
|
|
29
|
-
Requires-Dist: safetensors>=0.4.3; extra == "tts"
|
|
30
|
-
Requires-Dist: onnxruntime<=1.21.0,>=1.18.0; extra == "tts"
|
|
31
|
-
Requires-Dist: onnx; extra == "tts"
|
|
32
|
-
Requires-Dist: hyperpyyaml; extra == "tts"
|
|
33
|
-
Requires-Dist: openai-whisper==20231117; extra == "tts"
|
|
34
|
-
Requires-Dist: tqdm>=4.65.0; extra == "tts"
|
|
35
|
-
Requires-Dist: tiktoken; extra == "tts"
|
|
36
|
-
Requires-Dist: inflect; extra == "tts"
|
|
37
|
-
Requires-Dist: omegaconf>=2.0.6; extra == "tts"
|
|
38
|
-
Requires-Dist: conformer==0.3.2; extra == "tts"
|
|
39
|
-
Requires-Dist: einops==0.8.1; extra == "tts"
|
|
40
|
-
Requires-Dist: hydra-core; extra == "tts"
|
|
41
|
-
Requires-Dist: lightning==2.2.4; extra == "tts"
|
|
42
|
-
Requires-Dist: rich; extra == "tts"
|
|
43
|
-
Requires-Dist: gdown==5.2.0; extra == "tts"
|
|
44
|
-
Requires-Dist: matplotlib; extra == "tts"
|
|
45
|
-
Requires-Dist: wget; extra == "tts"
|
|
46
|
-
Requires-Dist: pyarrow; extra == "tts"
|
|
47
|
-
Requires-Dist: pyworld; extra == "tts"
|
|
48
|
-
Requires-Dist: scipy>=1.10.0; extra == "tts"
|
|
49
|
-
Requires-Dist: pyyaml; extra == "tts"
|
|
50
|
-
Requires-Dist: regex; extra == "tts"
|
|
51
|
-
Requires-Dist: soundfile==0.12.1; extra == "tts"
|
|
52
|
-
Requires-Dist: diffusers==0.29.0; extra == "tts"
|
|
53
|
-
Provides-Extra: streaming
|
|
54
|
-
Requires-Dist: minicpmo-utils[tts]; extra == "streaming"
|
|
55
|
-
Provides-Extra: all
|
|
56
|
-
Requires-Dist: minicpmo-utils[streaming,tts]; extra == "all"
|
|
57
|
-
|
|
58
|
-
## minicpmo-utils
|
|
59
|
-
|
|
60
|
-
一个统一安装的工具包(一个 PyPI 分发包),把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel,并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
|
|
61
|
-
|
|
62
|
-
### 安装方式
|
|
63
|
-
|
|
64
|
-
- 从源码本地安装(开发态,可编辑,默认只装公共依赖):
|
|
65
|
-
```bash
|
|
66
|
-
cd minicpmo-utils
|
|
67
|
-
pip install -e .
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
- 如果只想安装 cosyvoice 相关依赖(TTS):
|
|
71
|
-
```bash
|
|
72
|
-
pip install -e .[tts]
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
- 如果只想安装 stepaudio2 / streaming 相关依赖:
|
|
76
|
-
```bash
|
|
77
|
-
pip install -e .[streaming]
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
- 同时安装 cosyvoice + stepaudio2 相关依赖:
|
|
81
|
-
```bash
|
|
82
|
-
pip install -e .[tts,streaming]
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
- 构建并安装 wheel(推荐分发):
|
|
86
|
-
```bash
|
|
87
|
-
cd minicpmo-utils
|
|
88
|
-
python -m build # 生成 dist/*.whl
|
|
89
|
-
pip install \"dist/minicpmo_utils-0.1.0-py3-none-any.whl[tts,streaming]\"
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### 导入方式
|
|
93
|
-
|
|
94
|
-
包会暴露以下顶层模块,安装后可直接使用:
|
|
95
|
-
- `import cosyvoice`
|
|
96
|
-
- `import stepaudio2`
|
|
97
|
-
- `import matcha`
|
|
98
|
-
- `import minicpmo`
|
|
99
|
-
|
|
100
|
-
也支持通过统一入口导入子包:
|
|
101
|
-
```python
|
|
102
|
-
from minicpmo import cosyvoice, stepaudio2, matcha
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
以及通过统一的 utils 入口使用通用工具函数,例如:
|
|
106
|
-
|
|
107
|
-
```python
|
|
108
|
-
from minicpmo.utils import get_video_frame_audio_segments
|
|
109
|
-
```
|
|
110
|
-
|