PyPI - dora-distil-whisper - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl - Mend

dora-distil-whisper 0.3.8py3-none-any.whl → 0.3.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

dora_distil_whisper/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.m
 # Read the content of the README file
 try:
-    with open(readme_path, "r", encoding="utf-8") as f:
+    with open(readme_path, encoding="utf-8") as f:
         __doc__ = f.read()
 except FileNotFoundError:
     __doc__ = "README file not found."

dora_distil_whisper/main.py CHANGED Viewed

@@ -1,52 +1,62 @@
-import torch
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-from dora import Node
-import pyarrow as pa
 import os
+import sys
 from pathlib import Path
+import pyarrow as pa
+import torch
+from dora import Node
 DEFAULT_PATH = "openai/whisper-large-v3-turbo"
-TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "chinese")
+TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "english")
 TRANSLATE = bool(os.getenv("TRANSLATE", "False") in ["True", "true"])
-MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)
+def load_model():
+    from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+    MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)
-if bool(os.getenv("USE_MODELSCOPE_HUB") in ["True", "true"]):
-    from modelscope import snapshot_download
+    if bool(os.getenv("USE_MODELSCOPE_HUB") in ["True", "true"]):
+        from modelscope import snapshot_download
-    if not Path(MODEL_NAME_OR_PATH).exists():
-        MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)
+        if not Path(MODEL_NAME_OR_PATH).exists():
+            MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        MODEL_NAME_OR_PATH,
+        torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        use_safetensors=True,
+    )
+    model.to(device)
-model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    MODEL_NAME_OR_PATH,
-    torch_dtype=torch_dtype,
-    low_cpu_mem_usage=True,
-    use_safetensors=True,
-)
-model.to(device)
+    processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH)
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        max_new_tokens=400,
+        torch_dtype=torch_dtype,
+        device=device,
+    )
+    return pipe
-processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH)
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model=model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    max_new_tokens=400,
-    torch_dtype=torch_dtype,
-    device=device,
-)
 BAD_SENTENCES = [
+    "",
+    " so",
+    " so so",
     "字幕",
     "字幕志愿",
     "中文字幕",
     "我",
     "你",
+    " you",
+    "!",
     "THANK YOU",
     " Thank you.",
     " www.microsoft.com",
@@ -60,11 +70,14 @@ BAD_SENTENCES = [
 def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
+    if len(text) == 0:
+        return text
     # Check if the text is primarily Chinese (you may need to adjust this threshold)
     if sum(1 for char in text if "\u4e00" <= char <= "\u9fff") / len(text) > 0.5:
         # Chinese text processing
         for repeat_length in range(
-            min_repeat_length, min(max_repeat_length, len(text) // 2)
+            min_repeat_length,
+            min(max_repeat_length, len(text) // 2),
         ):
             for i in range(len(text) - repeat_length * 2 + 1):
                 chunk1 = text[i : i + repeat_length]
@@ -76,7 +89,8 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
         # Non-Chinese (space-separated) text processing
         words = text.split()
         for repeat_length in range(
-            min_repeat_length, min(max_repeat_length, len(words) // 2)
+            min_repeat_length,
+            min(max_repeat_length, len(words) // 2),
         ):
             for i in range(len(words) - repeat_length * 2 + 1):
                 chunk1 = " ".join(words[i : i + repeat_length])
@@ -90,6 +104,11 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
 def main():
     node = Node()
+    # For macos use mlx:
+    if sys.platform != "darwin":
+        pipe = load_model()
     for event in node:
         if event["type"] == "INPUT":
             audio = event["value"].to_numpy()
@@ -100,10 +119,20 @@ def main():
                     "language": TARGET_LANGUAGE,
                 }
             )
-            result = pipe(
-                audio,
-                generate_kwargs=confg,
-            )
+            if sys.platform == "darwin":
+                import mlx_whisper
+                result = mlx_whisper.transcribe(
+                    audio,
+                    path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
+                    append_punctuations=".",
+                )
+            else:
+                result = pipe(
+                    audio,
+                    generate_kwargs=confg,
+                )
             if result["text"] in BAD_SENTENCES:
                 continue
             text = cut_repetition(result["text"])

dora_distil_whisper-0.3.9.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,47 @@
+Metadata-Version: 2.2
+Name: dora-distil-whisper
+Version: 0.3.9
+Summary: Dora dora-distil-whisper
+Author-email: Haixuan Xavier Tao <tao.xavier@outlook.com>, Enzo Le Van <dev@enzo-le-van.fr>
+License: MIT
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: dora-rs>=0.3.6
+Requires-Dist: numpy<2.0.0
+Requires-Dist: pyarrow>=5.0.0
+Requires-Dist: transformers>=4.0.0
+Requires-Dist: accelerate>=0.29.2
+Requires-Dist: torch>=2.2.0
+Requires-Dist: modelscope>=1.18.1
+Requires-Dist: mlx-whisper>=0.4.1; sys_platform == "darwin"
+# Dora Whisper Node for transforming speech to text
+## YAML Specification
+This node is supposed to be used as follows:
+```yaml
+- id: dora-distil-whisper
+  build: pip install dora-distil-whisper
+  path: dora-distil-whisper
+  inputs:
+    input: dora-vad/audio
+  outputs:
+    - text
+  env:
+    TARGET_LANGUAGE: english
+```
+## Examples
+- speech to text
+  - github: https://github.com/dora-rs/dora/blob/main/examples/speech-to-text
+  - website: https://dora-rs.ai/docs/examples/stt
+- vision language model
+  - github: https://github.com/dora-rs/dora/blob/main/examples/vlm
+  - website: https://dora-rs.ai/docs/examples/vlm
+## License
+Dora-whisper's code and model weights are released under the MIT License

dora_distil_whisper-0.3.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+dora_distil_whisper/__init__.py,sha256=HuSK3dnyI9Pb5QAuaKFwQQ3J5SIZnLcKHPJO0norGzc,353
+dora_distil_whisper/main.py,sha256=MbT9nsEHxpyzcFzkDe4FjITzUeemh8LzCMmQSRL4xqo,4083
+dora_distil_whisper-0.3.9.dist-info/METADATA,sha256=Gxiyc_5VSjaQDduwawrH3VN2d0pKreJRC-_qaw7KKb4,1253
+dora_distil_whisper-0.3.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+dora_distil_whisper-0.3.9.dist-info/entry_points.txt,sha256=c6QYCZs0YVR2uasYHES67JhOOvTm5QbcwGk-9IrG9oM,70
+dora_distil_whisper-0.3.9.dist-info/top_level.txt,sha256=h5QH64SWnqZA83bx740-NTxfQKdeiKTLAdGqhnwKhuQ,20
+dora_distil_whisper-0.3.9.dist-info/RECORD,,

{dora_distil_whisper-0.3.8.dist-info → dora_distil_whisper-0.3.9.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,5 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.9.1
+Generator: setuptools (75.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

dora_distil_whisper-0.3.9.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ dora-distil-whisper = dora_distil_whisper.main:main

dora_distil_whisper-0.3.9.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ dora_distil_whisper

dora_distil_whisper-0.3.8.dist-info/METADATA DELETED Viewed

@@ -1,32 +0,0 @@
-Metadata-Version: 2.1
-Name: dora-distil-whisper
-Version: 0.3.8
-Summary: Dora dora-distil-whisper
-Home-page: https://github.com/dora-rs/dora.git
-License: MIT
-Author: Haixuan Xavier Tao
-Author-email: tao.xavier@outlook.com
-Requires-Python: >=3.7,<4.0
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Requires-Dist: accelerate (>=0.29.2,<0.30.0)
-Requires-Dist: dora-rs (>=0.3.6,<0.4.0)
-Requires-Dist: modelscope (>=1.18.1,<2.0.0)
-Requires-Dist: numpy (<2.0.0)
-Requires-Dist: pyarrow (>=5.0.0)
-Requires-Dist: torch (>=2.2.0,<3.0.0)
-Requires-Dist: transformers (>=4.0.0,<5.0.0)
-Project-URL: Documentation, https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md
-Description-Content-Type: text/markdown
-# Dora Node for transforming speech to text (English only)
-Check example at [examples/speech-to-text](examples/speech-to-text)

dora_distil_whisper-0.3.8.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-dora_distil_whisper/__init__.py,sha256=Gy4qL4vCeTyA5HR1Yp3ioL4-ClJyW8oi_38CzMuMsBM,358
-dora_distil_whisper/main.py,sha256=-lMXHjnBw0tWnQXyeoKkrbSC4w6F6UyHjzY0GT1EENs,3398
-dora_distil_whisper-0.3.8.dist-info/METADATA,sha256=Hpv9jDKCjy9vesIZLCwnth1Iqf8uUvo2yKfbib6yG7g,1256
-dora_distil_whisper-0.3.8.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-dora_distil_whisper-0.3.8.dist-info/entry_points.txt,sha256=Q_8wNgkDYxgoKETJjM6ewXWcr_yzRUgsSeBd0uetuRs,69
-dora_distil_whisper-0.3.8.dist-info/RECORD,,

dora_distil_whisper-0.3.8.dist-info/entry_points.txt DELETED Viewed

@@ -1,3 +0,0 @@
-[console_scripts]
-dora-distil-whisper=dora_distil_whisper.main:main

dora-distil-whisper 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

dora-distil-whisper 0.3.8py3-none-any.whl → 0.3.9py3-none-any.whl