minicpmo-utils 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. cosyvoice/__init__.py +17 -0
  2. cosyvoice/bin/average_model.py +93 -0
  3. cosyvoice/bin/export_jit.py +103 -0
  4. cosyvoice/bin/export_onnx.py +120 -0
  5. cosyvoice/bin/inference_deprecated.py +126 -0
  6. cosyvoice/bin/train.py +195 -0
  7. cosyvoice/cli/__init__.py +0 -0
  8. cosyvoice/cli/cosyvoice.py +209 -0
  9. cosyvoice/cli/frontend.py +238 -0
  10. cosyvoice/cli/model.py +386 -0
  11. cosyvoice/dataset/__init__.py +0 -0
  12. cosyvoice/dataset/dataset.py +151 -0
  13. cosyvoice/dataset/processor.py +434 -0
  14. cosyvoice/flow/decoder.py +494 -0
  15. cosyvoice/flow/flow.py +281 -0
  16. cosyvoice/flow/flow_matching.py +227 -0
  17. cosyvoice/flow/length_regulator.py +70 -0
  18. cosyvoice/hifigan/discriminator.py +230 -0
  19. cosyvoice/hifigan/f0_predictor.py +58 -0
  20. cosyvoice/hifigan/generator.py +582 -0
  21. cosyvoice/hifigan/hifigan.py +67 -0
  22. cosyvoice/llm/llm.py +610 -0
  23. cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
  24. cosyvoice/tokenizer/tokenizer.py +279 -0
  25. cosyvoice/transformer/__init__.py +0 -0
  26. cosyvoice/transformer/activation.py +84 -0
  27. cosyvoice/transformer/attention.py +330 -0
  28. cosyvoice/transformer/convolution.py +145 -0
  29. cosyvoice/transformer/decoder.py +396 -0
  30. cosyvoice/transformer/decoder_layer.py +132 -0
  31. cosyvoice/transformer/embedding.py +302 -0
  32. cosyvoice/transformer/encoder.py +474 -0
  33. cosyvoice/transformer/encoder_layer.py +236 -0
  34. cosyvoice/transformer/label_smoothing_loss.py +96 -0
  35. cosyvoice/transformer/positionwise_feed_forward.py +115 -0
  36. cosyvoice/transformer/subsampling.py +383 -0
  37. cosyvoice/transformer/upsample_encoder.py +320 -0
  38. cosyvoice/utils/__init__.py +0 -0
  39. cosyvoice/utils/class_utils.py +83 -0
  40. cosyvoice/utils/common.py +186 -0
  41. cosyvoice/utils/executor.py +176 -0
  42. cosyvoice/utils/file_utils.py +129 -0
  43. cosyvoice/utils/frontend_utils.py +136 -0
  44. cosyvoice/utils/losses.py +57 -0
  45. cosyvoice/utils/mask.py +265 -0
  46. cosyvoice/utils/scheduler.py +738 -0
  47. cosyvoice/utils/train_utils.py +367 -0
  48. cosyvoice/vllm/cosyvoice2.py +103 -0
  49. matcha/__init__.py +0 -0
  50. matcha/app.py +357 -0
  51. matcha/cli.py +418 -0
  52. matcha/hifigan/__init__.py +0 -0
  53. matcha/hifigan/config.py +28 -0
  54. matcha/hifigan/denoiser.py +64 -0
  55. matcha/hifigan/env.py +17 -0
  56. matcha/hifigan/meldataset.py +217 -0
  57. matcha/hifigan/models.py +368 -0
  58. matcha/hifigan/xutils.py +60 -0
  59. matcha/models/__init__.py +0 -0
  60. matcha/models/baselightningmodule.py +209 -0
  61. matcha/models/components/__init__.py +0 -0
  62. matcha/models/components/decoder.py +443 -0
  63. matcha/models/components/flow_matching.py +132 -0
  64. matcha/models/components/text_encoder.py +410 -0
  65. matcha/models/components/transformer.py +316 -0
  66. matcha/models/matcha_tts.py +239 -0
  67. matcha/onnx/__init__.py +0 -0
  68. matcha/onnx/export.py +181 -0
  69. matcha/onnx/infer.py +168 -0
  70. matcha/text/__init__.py +53 -0
  71. matcha/text/cleaners.py +116 -0
  72. matcha/text/numbers.py +71 -0
  73. matcha/text/symbols.py +17 -0
  74. matcha/train.py +122 -0
  75. matcha/utils/__init__.py +5 -0
  76. matcha/utils/audio.py +82 -0
  77. matcha/utils/generate_data_statistics.py +111 -0
  78. matcha/utils/instantiators.py +56 -0
  79. matcha/utils/logging_utils.py +53 -0
  80. matcha/utils/model.py +90 -0
  81. matcha/utils/monotonic_align/__init__.py +22 -0
  82. matcha/utils/monotonic_align/setup.py +7 -0
  83. matcha/utils/pylogger.py +21 -0
  84. matcha/utils/rich_utils.py +101 -0
  85. matcha/utils/utils.py +219 -0
  86. minicpmo/__init__.py +24 -0
  87. minicpmo/utils.py +636 -0
  88. minicpmo/version.py +2 -0
  89. minicpmo_utils-0.1.0.dist-info/METADATA +72 -0
  90. minicpmo_utils-0.1.0.dist-info/RECORD +148 -0
  91. minicpmo_utils-0.1.0.dist-info/WHEEL +5 -0
  92. minicpmo_utils-0.1.0.dist-info/top_level.txt +5 -0
  93. s3tokenizer/__init__.py +153 -0
  94. s3tokenizer/assets/BAC009S0764W0121.wav +0 -0
  95. s3tokenizer/assets/BAC009S0764W0122.wav +0 -0
  96. s3tokenizer/assets/mel_filters.npz +0 -0
  97. s3tokenizer/cli.py +183 -0
  98. s3tokenizer/model.py +546 -0
  99. s3tokenizer/model_v2.py +605 -0
  100. s3tokenizer/utils.py +390 -0
  101. stepaudio2/__init__.py +40 -0
  102. stepaudio2/cosyvoice2/__init__.py +1 -0
  103. stepaudio2/cosyvoice2/flow/__init__.py +0 -0
  104. stepaudio2/cosyvoice2/flow/decoder_dit.py +585 -0
  105. stepaudio2/cosyvoice2/flow/flow.py +230 -0
  106. stepaudio2/cosyvoice2/flow/flow_matching.py +205 -0
  107. stepaudio2/cosyvoice2/transformer/__init__.py +0 -0
  108. stepaudio2/cosyvoice2/transformer/attention.py +328 -0
  109. stepaudio2/cosyvoice2/transformer/embedding.py +119 -0
  110. stepaudio2/cosyvoice2/transformer/encoder_layer.py +163 -0
  111. stepaudio2/cosyvoice2/transformer/positionwise_feed_forward.py +56 -0
  112. stepaudio2/cosyvoice2/transformer/subsampling.py +79 -0
  113. stepaudio2/cosyvoice2/transformer/upsample_encoder_v2.py +483 -0
  114. stepaudio2/cosyvoice2/utils/__init__.py +1 -0
  115. stepaudio2/cosyvoice2/utils/class_utils.py +41 -0
  116. stepaudio2/cosyvoice2/utils/common.py +101 -0
  117. stepaudio2/cosyvoice2/utils/mask.py +49 -0
  118. stepaudio2/flashcosyvoice/__init__.py +0 -0
  119. stepaudio2/flashcosyvoice/cli.py +424 -0
  120. stepaudio2/flashcosyvoice/config.py +80 -0
  121. stepaudio2/flashcosyvoice/cosyvoice2.py +160 -0
  122. stepaudio2/flashcosyvoice/cosyvoice3.py +1 -0
  123. stepaudio2/flashcosyvoice/engine/__init__.py +0 -0
  124. stepaudio2/flashcosyvoice/engine/block_manager.py +114 -0
  125. stepaudio2/flashcosyvoice/engine/llm_engine.py +125 -0
  126. stepaudio2/flashcosyvoice/engine/model_runner.py +310 -0
  127. stepaudio2/flashcosyvoice/engine/scheduler.py +77 -0
  128. stepaudio2/flashcosyvoice/engine/sequence.py +90 -0
  129. stepaudio2/flashcosyvoice/modules/__init__.py +0 -0
  130. stepaudio2/flashcosyvoice/modules/flow.py +198 -0
  131. stepaudio2/flashcosyvoice/modules/flow_components/__init__.py +0 -0
  132. stepaudio2/flashcosyvoice/modules/flow_components/estimator.py +974 -0
  133. stepaudio2/flashcosyvoice/modules/flow_components/upsample_encoder.py +998 -0
  134. stepaudio2/flashcosyvoice/modules/hifigan.py +249 -0
  135. stepaudio2/flashcosyvoice/modules/hifigan_components/__init__.py +0 -0
  136. stepaudio2/flashcosyvoice/modules/hifigan_components/layers.py +433 -0
  137. stepaudio2/flashcosyvoice/modules/qwen2.py +92 -0
  138. stepaudio2/flashcosyvoice/modules/qwen2_components/__init__.py +0 -0
  139. stepaudio2/flashcosyvoice/modules/qwen2_components/layers.py +616 -0
  140. stepaudio2/flashcosyvoice/modules/sampler.py +231 -0
  141. stepaudio2/flashcosyvoice/utils/__init__.py +0 -0
  142. stepaudio2/flashcosyvoice/utils/audio.py +77 -0
  143. stepaudio2/flashcosyvoice/utils/context.py +28 -0
  144. stepaudio2/flashcosyvoice/utils/loader.py +116 -0
  145. stepaudio2/flashcosyvoice/utils/memory.py +19 -0
  146. stepaudio2/stepaudio2.py +204 -0
  147. stepaudio2/token2wav.py +248 -0
  148. stepaudio2/utils.py +91 -0
matcha/utils/utils.py ADDED
@@ -0,0 +1,219 @@
1
+ import os
2
+ import sys
3
+ import warnings
4
+ from importlib.util import find_spec
5
+ from pathlib import Path
6
+ from typing import Any, Callable, Dict, Tuple
7
+
8
+ import gdown
9
+ import matplotlib.pyplot as plt
10
+ import numpy as np
11
+ import torch
12
+ import wget
13
+ from omegaconf import DictConfig
14
+
15
+ from matcha.utils import pylogger, rich_utils
16
+
17
+ log = pylogger.get_pylogger(__name__)
18
+
19
+
20
+ def extras(cfg: DictConfig) -> None:
21
+ """Applies optional utilities before the task is started.
22
+
23
+ Utilities:
24
+ - Ignoring python warnings
25
+ - Setting tags from command line
26
+ - Rich config printing
27
+
28
+ :param cfg: A DictConfig object containing the config tree.
29
+ """
30
+ # return if no `extras` config
31
+ if not cfg.get("extras"):
32
+ log.warning("Extras config not found! <cfg.extras=null>")
33
+ return
34
+
35
+ # disable python warnings
36
+ if cfg.extras.get("ignore_warnings"):
37
+ log.info("Disabling python warnings! <cfg.extras.ignore_warnings=True>")
38
+ warnings.filterwarnings("ignore")
39
+
40
+ # prompt user to input tags from command line if none are provided in the config
41
+ if cfg.extras.get("enforce_tags"):
42
+ log.info("Enforcing tags! <cfg.extras.enforce_tags=True>")
43
+ rich_utils.enforce_tags(cfg, save_to_file=True)
44
+
45
+ # pretty print config tree using Rich library
46
+ if cfg.extras.get("print_config"):
47
+ log.info("Printing config tree with Rich! <cfg.extras.print_config=True>")
48
+ rich_utils.print_config_tree(cfg, resolve=True, save_to_file=True)
49
+
50
+
51
+ def task_wrapper(task_func: Callable) -> Callable:
52
+ """Optional decorator that controls the failure behavior when executing the task function.
53
+
54
+ This wrapper can be used to:
55
+ - make sure loggers are closed even if the task function raises an exception (prevents multirun failure)
56
+ - save the exception to a `.log` file
57
+ - mark the run as failed with a dedicated file in the `logs/` folder (so we can find and rerun it later)
58
+ - etc. (adjust depending on your needs)
59
+
60
+ Example:
61
+ ```
62
+ @utils.task_wrapper
63
+ def train(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
64
+ ...
65
+ return metric_dict, object_dict
66
+ ```
67
+
68
+ :param task_func: The task function to be wrapped.
69
+
70
+ :return: The wrapped task function.
71
+ """
72
+
73
+ def wrap(cfg: DictConfig) -> Tuple[Dict[str, Any], Dict[str, Any]]:
74
+ # execute the task
75
+ try:
76
+ metric_dict, object_dict = task_func(cfg=cfg)
77
+
78
+ # things to do if exception occurs
79
+ except Exception as ex:
80
+ # save exception to `.log` file
81
+ log.exception("")
82
+
83
+ # some hyperparameter combinations might be invalid or cause out-of-memory errors
84
+ # so when using hparam search plugins like Optuna, you might want to disable
85
+ # raising the below exception to avoid multirun failure
86
+ raise ex
87
+
88
+ # things to always do after either success or exception
89
+ finally:
90
+ # display output dir path in terminal
91
+ log.info(f"Output dir: {cfg.paths.output_dir}")
92
+
93
+ # always close wandb run (even if exception occurs so multirun won't fail)
94
+ if find_spec("wandb"): # check if wandb is installed
95
+ import wandb
96
+
97
+ if wandb.run:
98
+ log.info("Closing wandb!")
99
+ wandb.finish()
100
+
101
+ return metric_dict, object_dict
102
+
103
+ return wrap
104
+
105
+
106
+ def get_metric_value(metric_dict: Dict[str, Any], metric_name: str) -> float:
107
+ """Safely retrieves value of the metric logged in LightningModule.
108
+
109
+ :param metric_dict: A dict containing metric values.
110
+ :param metric_name: The name of the metric to retrieve.
111
+ :return: The value of the metric.
112
+ """
113
+ if not metric_name:
114
+ log.info("Metric name is None! Skipping metric value retrieval...")
115
+ return None
116
+
117
+ if metric_name not in metric_dict:
118
+ raise ValueError(
119
+ f"Metric value not found! <metric_name={metric_name}>\n"
120
+ "Make sure metric name logged in LightningModule is correct!\n"
121
+ "Make sure `optimized_metric` name in `hparams_search` config is correct!"
122
+ )
123
+
124
+ metric_value = metric_dict[metric_name].item()
125
+ log.info(f"Retrieved metric value! <{metric_name}={metric_value}>")
126
+
127
+ return metric_value
128
+
129
+
130
+ def intersperse(lst, item):
131
+ # Adds blank symbol
132
+ result = [item] * (len(lst) * 2 + 1)
133
+ result[1::2] = lst
134
+ return result
135
+
136
+
137
+ def save_figure_to_numpy(fig):
138
+ data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
139
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
140
+ return data
141
+
142
+
143
+ def plot_tensor(tensor):
144
+ plt.style.use("default")
145
+ fig, ax = plt.subplots(figsize=(12, 3))
146
+ im = ax.imshow(tensor, aspect="auto", origin="lower", interpolation="none")
147
+ plt.colorbar(im, ax=ax)
148
+ plt.tight_layout()
149
+ fig.canvas.draw()
150
+ data = save_figure_to_numpy(fig)
151
+ plt.close()
152
+ return data
153
+
154
+
155
+ def save_plot(tensor, savepath):
156
+ plt.style.use("default")
157
+ fig, ax = plt.subplots(figsize=(12, 3))
158
+ im = ax.imshow(tensor, aspect="auto", origin="lower", interpolation="none")
159
+ plt.colorbar(im, ax=ax)
160
+ plt.tight_layout()
161
+ fig.canvas.draw()
162
+ plt.savefig(savepath)
163
+ plt.close()
164
+
165
+
166
+ def to_numpy(tensor):
167
+ if isinstance(tensor, np.ndarray):
168
+ return tensor
169
+ elif isinstance(tensor, torch.Tensor):
170
+ return tensor.detach().cpu().numpy()
171
+ elif isinstance(tensor, list):
172
+ return np.array(tensor)
173
+ else:
174
+ raise TypeError("Unsupported type for conversion to numpy array")
175
+
176
+
177
+ def get_user_data_dir(appname="matcha_tts"):
178
+ """
179
+ Args:
180
+ appname (str): Name of application
181
+
182
+ Returns:
183
+ Path: path to user data directory
184
+ """
185
+
186
+ MATCHA_HOME = os.environ.get("MATCHA_HOME")
187
+ if MATCHA_HOME is not None:
188
+ ans = Path(MATCHA_HOME).expanduser().resolve(strict=False)
189
+ elif sys.platform == "win32":
190
+ import winreg # pylint: disable=import-outside-toplevel
191
+
192
+ key = winreg.OpenKey(
193
+ winreg.HKEY_CURRENT_USER,
194
+ r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders",
195
+ )
196
+ dir_, _ = winreg.QueryValueEx(key, "Local AppData")
197
+ ans = Path(dir_).resolve(strict=False)
198
+ elif sys.platform == "darwin":
199
+ ans = Path("~/Library/Application Support/").expanduser()
200
+ else:
201
+ ans = Path.home().joinpath(".local/share")
202
+
203
+ final_path = ans.joinpath(appname)
204
+ final_path.mkdir(parents=True, exist_ok=True)
205
+ return final_path
206
+
207
+
208
+ def assert_model_downloaded(checkpoint_path, url, use_wget=True):
209
+ if Path(checkpoint_path).exists():
210
+ log.debug(f"[+] Model already present at {checkpoint_path}!")
211
+ print(f"[+] Model already present at {checkpoint_path}!")
212
+ return
213
+ log.info(f"[-] Model not found at {checkpoint_path}! Will download it")
214
+ print(f"[-] Model not found at {checkpoint_path}! Will download it")
215
+ checkpoint_path = str(checkpoint_path)
216
+ if not use_wget:
217
+ gdown.download(url=url, output=checkpoint_path, quiet=False, fuzzy=True)
218
+ else:
219
+ wget.download(url=url, out=checkpoint_path)
minicpmo/__init__.py ADDED
@@ -0,0 +1,24 @@
1
+ """MiniCPM-o 顶层工具包 `minicpmo`。
2
+
3
+ 这个包聚合了语音 / 音频相关子包,并预留统一的 utils 入口:
4
+
5
+ - 顶层子包:
6
+ - cosyvoice
7
+ - stepaudio2
8
+ - matcha
9
+ - s3tokenizer
10
+ - 工具入口:
11
+ - from minicpmo.utils import ...
12
+ """
13
+
14
+ from .version import __version__
15
+
16
+ # Eager re-exports to allow:
17
+ # from minicpmo import cosyvoice, stepaudio2, matcha
18
+ # 而不需要懒加载。
19
+ import cosyvoice as cosyvoice
20
+ import stepaudio2 as stepaudio2
21
+ import matcha as matcha
22
+
23
+ __all__ = ["__version__", "cosyvoice", "stepaudio2", "matcha"]
24
+