PyPI - minicpmo-utils - Versions diffs - 0.0.5__tar.gz → 0.1.0__tar.gz - Mend

minicpmo-utils 0.0.5tar.gz → 0.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

minicpmo_utils-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,72 @@
+Metadata-Version: 2.4
+Name: minicpmo-utils
+Version: 0.1.0
+Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
+Author: MiniCPM-o Utils Maintainers
+License: Apache-2.0
+Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: torch>=2.3.0
+Requires-Dist: torchaudio>=2.3.0
+Requires-Dist: transformers>=4.49.0
+Requires-Dist: numpy
+Requires-Dist: hyperpyyaml
+Requires-Dist: modelscope
+Requires-Dist: openai-whisper
+Requires-Dist: tqdm
+Requires-Dist: tiktoken
+Requires-Dist: inflect
+Requires-Dist: omegaconf
+Requires-Dist: einops
+Requires-Dist: librosa
+Requires-Dist: onnxruntime>=1.18.0
+Requires-Dist: diffusers
+Provides-Extra: gpu
+Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
+## minicpmo-utils
+一个统一安装的工具包（一个 PyPI 分发包），把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel，并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
+### 安装方式
+- 从源码本地安装（开发态，可编辑）：
+```bash
+cd minicpmo-utils
+pip install -e .
+```
+- 构建并安装 wheel（推荐分发）：
+```bash
+cd minicpmo-utils
+python -m build        # 生成 dist/*.whl
+pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
+```
+### 导入方式
+包会暴露以下顶层模块，安装后可直接使用：
+- `import cosyvoice`
+- `import stepaudio2`
+- `import matcha`
+- `import minicpmo`
+也支持通过统一入口导入子包：
+```python
+from minicpmo import cosyvoice, stepaudio2, matcha
+```
+以及通过统一的 utils 入口使用通用工具函数，例如：
+```python
+from minicpmo.utils import get_video_frame_audio_segments
+```

{minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/README.md RENAMED Viewed

@@ -4,32 +4,17 @@
 ### 安装方式
-- 从源码本地安装（开发态，可编辑，默认只装公共依赖）：
+- 从源码本地安装（开发态，可编辑）：
 ```bash
 cd minicpmo-utils
 pip install -e .
 ```
-- 如果只想安装 cosyvoice 相关依赖（TTS）：
-```bash
-pip install -e .[tts]
-```
-- 如果只想安装 stepaudio2 / streaming 相关依赖：
-```bash
-pip install -e .[streaming]
-```
-- 同时安装 cosyvoice + stepaudio2 相关依赖：
-```bash
-pip install -e .[tts,streaming]
-```
 - 构建并安装 wheel（推荐分发）：
 ```bash
 cd minicpmo-utils
 python -m build        # 生成 dist/*.whl
-pip install \"dist/minicpmo_utils-0.1.0-py3-none-any.whl[tts,streaming]\"
+pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
 ```
 ### 导入方式

{minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "minicpmo-utils"
-version = "0.0.5"
+version = "0.1.0"
 description = "Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils."
 readme = "README.md"
 requires-python = ">=3.10"
@@ -31,68 +31,34 @@ classifiers = [
 #   - s3tokenizer (来自 S3Tokenizer-main)
 #   - minicpmo (本项目扩展 utils 的统一入口：from minicpmo.utils import ...)
 dependencies = [
-  "numpy",
-  "pillow==10.4.0",
-  "librosa==0.9.0",
-  "decord==0.6.0",
-  "moviepy==2.1.2",
-  "numba==0.61.2",
-]
-[project.optional-dependencies]
-# cosyvoice TTS 相关依赖
-tts = [
+  # Core ML frameworks (align to stepaudio2's requirements)
   "torch>=2.3.0",
   "torchaudio>=2.3.0",
-  "transformers>=4.51.0,<4.53.0",  # 4.52+ 有兼容性问题
-  "onnxruntime>=1.18.0,<=1.21.0",
-  "onnx",
+  "transformers>=4.49.0",
+  # Shared / common
+  "numpy",
   "hyperpyyaml",
-  "openai-whisper==20231117",
+  # cosyvoice side
+  "modelscope",
+  "openai-whisper",
   "tqdm",
   "tiktoken",
   "inflect",
-  "omegaconf>=2.0.6",
-  "conformer==0.3.2",
-  "einops==0.8.1",
-  "hydra-core",
-  "lightning==2.2.4",
-  "rich",
-  "gdown==5.2.0",
-  "matplotlib",
-  "wget",
-  "pyarrow",
-  "pyworld",
-  # 新增依赖
-  "scipy",
-  "pyyaml",
-  "regex",
-  "soundfile==0.12.1",
-  "diffusers==0.29.0"
-]
-# stepaudio2 基础依赖（token2wav 等）
-streaming = [
-  "minicpmo-utils[tts]",  # streaming 依赖 tts
-]
+  "omegaconf",
+  "einops",
-# stepaudio2 Flash 推理引擎依赖（flashcosyvoice.engine 模块需要）
-streaming-flash = [
-  "minicpmo-utils[streaming]",
-  "flash-attn>=2.6.0; sys_platform == 'linux'",
-  "triton>=2.3.0; sys_platform == 'linux'",
-  "safetensors",
-  "pynvml",
-  "xxhash",
+  # stepaudio2 side
+  "librosa",
+  "onnxruntime>=1.18.0",
+  "diffusers",
 ]
-# Linux GPU onnxruntime 可以很重，且与环境强相关，保留为可选 extra
+[project.optional-dependencies]
+# Linux GPU onnxruntime can be heavy and environment-specific; keep as an opt-in extra.
 gpu = [
-  "onnxruntime-gpu>=1.18.0,<=1.23.2; sys_platform == 'linux'",
-]
-all = [
-  "minicpmo-utils[tts,streaming,gpu]",
+  "onnxruntime-gpu>=1.18.0; sys_platform == 'linux'",
 ]
 [tool.setuptools]

{minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/cli/cosyvoice.py RENAMED Viewed

@@ -16,6 +16,7 @@ import time
 from typing import Generator
 from tqdm import tqdm
 from hyperpyyaml import load_hyperpyyaml
+from modelscope import snapshot_download
 import torch
 from cosyvoice.cli.frontend import CosyVoiceFrontEnd
 from cosyvoice.cli.model import CosyVoiceModel, CosyVoice2Model
@@ -29,6 +30,8 @@ class CosyVoice:
         self.instruct = True if '-Instruct' in model_dir else False
         self.model_dir = model_dir
         self.fp16 = fp16
+        if not os.path.exists(model_dir):
+            model_dir = snapshot_download(model_dir)
         hyper_yaml_path = '{}/cosyvoice.yaml'.format(model_dir)
         if not os.path.exists(hyper_yaml_path):
             raise ValueError('{} not found!'.format(hyper_yaml_path))
@@ -151,6 +154,8 @@ class CosyVoice2(CosyVoice):
         self.instruct = True if '-Instruct' in model_dir else False
         self.model_dir = model_dir
         self.fp16 = fp16
+        if not os.path.exists(model_dir):
+            model_dir = snapshot_download(model_dir)
         if config_path is None:
             config_path = f'{model_dir}/cosyvoice2.yaml'

{minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/cosyvoice/utils/file_utils.py RENAMED Viewed

@@ -20,8 +20,7 @@ import torch
 import torchaudio
 import logging
 logging.getLogger('matplotlib').setLevel(logging.WARNING)
-logging.getLogger('numba').setLevel(logging.WARNING)
-logging.basicConfig(level=logging.INFO,
+logging.basicConfig(level=logging.DEBUG,
                     format='%(asctime)s %(levelname)s %(message)s')

{minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo/__init__.py RENAMED Viewed

@@ -12,3 +12,13 @@
 """
 from .version import __version__
+# Eager re-exports to allow:
+#   from minicpmo import cosyvoice, stepaudio2, matcha
+# 而不需要懒加载。
+import cosyvoice as cosyvoice
+import stepaudio2 as stepaudio2
+import matcha as matcha
+__all__ = ["__version__", "cosyvoice", "stepaudio2", "matcha"]

{minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/minicpmo/utils.py RENAMED Viewed

@@ -26,84 +26,6 @@ MAX_NUM_FRAMES = int(os.getenv("MAX_NUM_FRAMES", 64))
 VIDEO_MME_DURATION = os.getenv("VIDEO_MME_DURATION", "ALL")
-def find_cjk_font():
-    """
-    查找支持中文的字体。
-    按优先级返回字体名称或路径，如果找不到则返回 None。
-    """
-    # 常见的中文字体名称（按优先级排列）
-    font_names = [
-        # Noto CJK 字体（推荐）
-        "Noto Sans CJK SC",
-        "Noto Sans CJK",
-        "NotoSansCJK-Regular",
-        # 文泉驿字体
-        "WenQuanYi Zen Hei",
-        "WenQuanYi Micro Hei",
-        "文泉驿正黑",
-        "文泉驿微米黑",
-        # 思源字体
-        "Source Han Sans SC",
-        "Source Han Sans CN",
-        # 其他常见中文字体
-        "SimHei",
-        "Microsoft YaHei",
-        "PingFang SC",
-        "Hiragino Sans GB",
-        "STHeiti",
-        "AR PL UMing CN",
-        "AR PL UKai CN",
-    ]
-    # 常见的中文字体文件路径
-    font_paths = [
-        # Noto CJK
-        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
-        "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
-        "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
-        "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
-        # 文泉驿
-        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
-        "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
-        "/usr/share/fonts/wenquanyi/wqy-zenhei/wqy-zenhei.ttc",
-        "/usr/share/fonts/wenquanyi/wqy-microhei/wqy-microhei.ttc",
-        # macOS
-        "/System/Library/Fonts/PingFang.ttc",
-        "/Library/Fonts/Arial Unicode.ttf",
-        # Windows (WSL)
-        "/mnt/c/Windows/Fonts/msyh.ttc",
-        "/mnt/c/Windows/Fonts/simhei.ttf",
-    ]
-    # 首先尝试使用 fc-list 查找已安装的中文字体
-    try:
-        result = subprocess.run(
-            ["fc-list", ":lang=zh", "-f", "%{family}\n"],
-            capture_output=True,
-            text=True,
-            timeout=5,
-        )
-        if result.returncode == 0:
-            installed_fonts = set(result.stdout.strip().split("\n"))
-            for font_name in font_names:
-                for installed in installed_fonts:
-                    if font_name.lower() in installed.lower():
-                        logger.info(f"Found CJK font via fc-list: {installed}")
-                        return installed.split(",")[0]  # 取第一个名称
-    except Exception as e:
-        logger.debug(f"fc-list failed: {e}")
-    # 然后检查常见的字体文件路径
-    for path in font_paths:
-        if os.path.exists(path):
-            logger.info(f"Found CJK font file: {path}")
-            return path
-    logger.warning("No CJK font found. Chinese subtitles may display as boxes/garbled text.")
-    logger.warning("Install Chinese fonts with: sudo apt-get install fonts-noto-cjk")
-    return None
 def concat_images(images, bg_color=(255, 255, 255), cell_size=None, line_color=(0, 0, 0), line_width=6):
     """
     images: List[PIL.Image.Image]
@@ -649,18 +571,9 @@ def generate_duplex_video(
     if has_subtitles:
         srt_path_escaped = srt_path.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
-        # 查找支持中文的字体
-        cjk_font = find_cjk_font()
-        font_style = ""
-        if cjk_font:
-            # 转义字体路径/名称中的特殊字符
-            font_escaped = cjk_font.replace("\\", "\\\\").replace("'", "'\\''").replace(":", "\\:")
-            font_style = f"FontName={font_escaped},"
         subtitle_filter = (
             f"subtitles='{srt_path_escaped}':"
-            f"force_style='{font_style}FontSize=28,"
+            f"force_style='FontSize=28,"
             f"PrimaryColour=&H00FFFFFF,"
             f"OutlineColour=&H00000000,"
             f"BorderStyle=3,"

minicpmo_utils-0.1.0/src/minicpmo_utils.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,72 @@
+Metadata-Version: 2.4
+Name: minicpmo-utils
+Version: 0.1.0
+Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
+Author: MiniCPM-o Utils Maintainers
+License: Apache-2.0
+Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: torch>=2.3.0
+Requires-Dist: torchaudio>=2.3.0
+Requires-Dist: transformers>=4.49.0
+Requires-Dist: numpy
+Requires-Dist: hyperpyyaml
+Requires-Dist: modelscope
+Requires-Dist: openai-whisper
+Requires-Dist: tqdm
+Requires-Dist: tiktoken
+Requires-Dist: inflect
+Requires-Dist: omegaconf
+Requires-Dist: einops
+Requires-Dist: librosa
+Requires-Dist: onnxruntime>=1.18.0
+Requires-Dist: diffusers
+Provides-Extra: gpu
+Requires-Dist: onnxruntime-gpu>=1.18.0; sys_platform == "linux" and extra == "gpu"
+## minicpmo-utils
+一个统一安装的工具包（一个 PyPI 分发包），把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel，并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
+### 安装方式
+- 从源码本地安装（开发态，可编辑）：
+```bash
+cd minicpmo-utils
+pip install -e .
+```
+- 构建并安装 wheel（推荐分发）：
+```bash
+cd minicpmo-utils
+python -m build        # 生成 dist/*.whl
+pip install dist/minicpmo_utils-0.1.0-py3-none-any.whl
+```
+### 导入方式
+包会暴露以下顶层模块，安装后可直接使用：
+- `import cosyvoice`
+- `import stepaudio2`
+- `import matcha`
+- `import minicpmo`
+也支持通过统一入口导入子包：
+```python
+from minicpmo import cosyvoice, stepaudio2, matcha
+```
+以及通过统一的 utils 入口使用通用工具函数，例如：
+```python
+from minicpmo.utils import get_video_frame_audio_segments
+```

minicpmo_utils-0.1.0/src/minicpmo_utils.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,20 @@
+torch>=2.3.0
+torchaudio>=2.3.0
+transformers>=4.49.0
+numpy
+hyperpyyaml
+modelscope
+openai-whisper
+tqdm
+tiktoken
+inflect
+omegaconf
+einops
+librosa
+onnxruntime>=1.18.0
+diffusers
+[gpu]
+[gpu:sys_platform == "linux"]
+onnxruntime-gpu>=1.18.0

{minicpmo_utils-0.0.5 → minicpmo_utils-0.1.0}/src/stepaudio2/token2wav.py RENAMED Viewed

@@ -7,6 +7,7 @@ import torchaudio
 import s3tokenizer
 import onnxruntime
 import numpy as np
+from copy import deepcopy
 import torchaudio.compliance.kaldi as kaldi
 from stepaudio2.flashcosyvoice.modules.hifigan import HiFTGenerator

minicpmo_utils-0.0.5/PKG-INFO DELETED Viewed

@@ -1,116 +0,0 @@
-Metadata-Version: 2.4
-Name: minicpmo-utils
-Version: 0.0.5
-Summary: Unified utilities package for MiniCPM-o: includes cosyvoice + stepaudio2 and extensible utils.
-Author: MiniCPM-o Utils Maintainers
-License: Apache-2.0
-Keywords: minicpmo,audio,tts,utils,cosyvoice,stepaudio2
-Classifier: Development Status :: 4 - Beta
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Requires-Python: >=3.10
-Description-Content-Type: text/markdown
-Requires-Dist: numpy
-Requires-Dist: pillow==10.4.0
-Requires-Dist: librosa==0.9.0
-Requires-Dist: decord==0.6.0
-Requires-Dist: moviepy==2.1.2
-Requires-Dist: numba==0.61.2
-Provides-Extra: tts
-Requires-Dist: torch>=2.3.0; extra == "tts"
-Requires-Dist: torchaudio>=2.3.0; extra == "tts"
-Requires-Dist: transformers<4.53.0,>=4.51.0; extra == "tts"
-Requires-Dist: onnxruntime<=1.21.0,>=1.18.0; extra == "tts"
-Requires-Dist: onnx; extra == "tts"
-Requires-Dist: hyperpyyaml; extra == "tts"
-Requires-Dist: openai-whisper==20231117; extra == "tts"
-Requires-Dist: tqdm; extra == "tts"
-Requires-Dist: tiktoken; extra == "tts"
-Requires-Dist: inflect; extra == "tts"
-Requires-Dist: omegaconf>=2.0.6; extra == "tts"
-Requires-Dist: conformer==0.3.2; extra == "tts"
-Requires-Dist: einops==0.8.1; extra == "tts"
-Requires-Dist: hydra-core; extra == "tts"
-Requires-Dist: lightning==2.2.4; extra == "tts"
-Requires-Dist: rich; extra == "tts"
-Requires-Dist: gdown==5.2.0; extra == "tts"
-Requires-Dist: matplotlib; extra == "tts"
-Requires-Dist: wget; extra == "tts"
-Requires-Dist: pyarrow; extra == "tts"
-Requires-Dist: pyworld; extra == "tts"
-Requires-Dist: scipy; extra == "tts"
-Requires-Dist: pyyaml; extra == "tts"
-Requires-Dist: regex; extra == "tts"
-Requires-Dist: soundfile==0.12.1; extra == "tts"
-Requires-Dist: diffusers==0.29.0; extra == "tts"
-Provides-Extra: streaming
-Requires-Dist: minicpmo-utils[tts]; extra == "streaming"
-Provides-Extra: streaming-flash
-Requires-Dist: minicpmo-utils[streaming]; extra == "streaming-flash"
-Requires-Dist: flash-attn>=2.6.0; sys_platform == "linux" and extra == "streaming-flash"
-Requires-Dist: triton>=2.3.0; sys_platform == "linux" and extra == "streaming-flash"
-Requires-Dist: safetensors; extra == "streaming-flash"
-Requires-Dist: pynvml; extra == "streaming-flash"
-Requires-Dist: xxhash; extra == "streaming-flash"
-Provides-Extra: gpu
-Requires-Dist: onnxruntime-gpu<=1.23.2,>=1.18.0; sys_platform == "linux" and extra == "gpu"
-Provides-Extra: all
-Requires-Dist: minicpmo-utils[gpu,streaming,tts]; extra == "all"
-## minicpmo-utils
-一个统一安装的工具包（一个 PyPI 分发包），把仓库里的 `cosyvoice` 与 `stepaudio2` 一起打进同一个 wheel，并预留 `minicpmo` 作为后续扩展 utils 的统一入口。
-### 安装方式
-- 从源码本地安装（开发态，可编辑，默认只装公共依赖）：
-```bash
-cd minicpmo-utils
-pip install -e .
-```
-- 如果只想安装 cosyvoice 相关依赖（TTS）：
-```bash
-pip install -e .[tts]
-```
-- 如果只想安装 stepaudio2 / streaming 相关依赖：
-```bash
-pip install -e .[streaming]
-```
-- 同时安装 cosyvoice + stepaudio2 相关依赖：
-```bash
-pip install -e .[tts,streaming]
-```
-- 构建并安装 wheel（推荐分发）：
-```bash
-cd minicpmo-utils
-python -m build        # 生成 dist/*.whl
-pip install \"dist/minicpmo_utils-0.1.0-py3-none-any.whl[tts,streaming]\"
-```
-### 导入方式
-包会暴露以下顶层模块，安装后可直接使用：
-- `import cosyvoice`
-- `import stepaudio2`
-- `import matcha`
-- `import minicpmo`
-也支持通过统一入口导入子包：
-```python
-from minicpmo import cosyvoice, stepaudio2, matcha
-```
-以及通过统一的 utils 入口使用通用工具函数，例如：
-```python
-from minicpmo.utils import get_video_frame_audio_segments
-```

minicpmo-utils 0.0.5__tar.gz → 0.1.0__tar.gz

minicpmo-utils 0.0.5tar.gz → 0.1.0tar.gz