PyPI - dora-distil-whisper - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

dora-distil-whisper 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

dora_distil_whisper/main.py CHANGED Viewed

@@ -3,40 +3,108 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 from dora import Node
 import pyarrow as pa
 import os
+from pathlib import Path
-os.environ["TRANSFORMERS_OFFLINE"] = "1"
+DEFAULT_PATH = "openai/whisper-large-v3-turbo"
+TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "chinese")
+TRANSLATE = bool(os.getenv("TRANSLATE", "False") in ["True", "true"])
+MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)
+if bool(os.getenv("USE_MODELSCOPE_HUB") in ["True", "true"]):
+    from modelscope import snapshot_download
+    if not Path(MODEL_NAME_OR_PATH).exists():
+        MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-model_id = "distil-whisper/distil-large-v3"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    model_id,
+    MODEL_NAME_OR_PATH,
     torch_dtype=torch_dtype,
     low_cpu_mem_usage=True,
     use_safetensors=True,
-    local_files_only=True,
 )
 model.to(device)
-processor = AutoProcessor.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH)
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model,
     tokenizer=processor.tokenizer,
     feature_extractor=processor.feature_extractor,
-    max_new_tokens=128,
+    max_new_tokens=400,
     torch_dtype=torch_dtype,
     device=device,
-    generate_kwargs={"language": "chinese"},
 )
+BAD_SENTENCES = [
+    "字幕",
+    "字幕志愿",
+    "中文字幕",
+    "我",
+    "你",
+    "THANK YOU",
+    " Thank you.",
+    " www.microsoft.com",
+    " The",
+    " BANG",
+    " Silence.",
+    " Sous-titrage Société Radio-Canada",
+    " Sous",
+    " Sous-",
+]
+def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
+    # Check if the text is primarily Chinese (you may need to adjust this threshold)
+    if sum(1 for char in text if "\u4e00" <= char <= "\u9fff") / len(text) > 0.5:
+        # Chinese text processing
+        for repeat_length in range(
+            min_repeat_length, min(max_repeat_length, len(text) // 2)
+        ):
+            for i in range(len(text) - repeat_length * 2 + 1):
+                chunk1 = text[i : i + repeat_length]
+                chunk2 = text[i + repeat_length : i + repeat_length * 2]
+                if chunk1 == chunk2:
+                    return text[: i + repeat_length]
+    else:
+        # Non-Chinese (space-separated) text processing
+        words = text.split()
+        for repeat_length in range(
+            min_repeat_length, min(max_repeat_length, len(words) // 2)
+        ):
+            for i in range(len(words) - repeat_length * 2 + 1):
+                chunk1 = " ".join(words[i : i + repeat_length])
+                chunk2 = " ".join(words[i + repeat_length : i + repeat_length * 2])
+                if chunk1 == chunk2:
+                    return " ".join(words[: i + repeat_length])
+    return text
 def main():
     node = Node()
     for event in node:
         if event["type"] == "INPUT":
             audio = event["value"].to_numpy()
-            result = pipe(audio)
-            node.send_output("text", pa.array([result["text"]]))
+            confg = (
+                {"language": TARGET_LANGUAGE, "task": "translate"}
+                if TRANSLATE
+                else {
+                    "language": TARGET_LANGUAGE,
+                }
+            )
+            result = pipe(
+                audio,
+                generate_kwargs=confg,
+            )
+            if result["text"] in BAD_SENTENCES:
+                continue
+            text = cut_repetition(result["text"])
+            node.send_output("text", pa.array([text]), {"language": TARGET_LANGUAGE})

{dora_distil_whisper-0.3.6.dist-info → dora_distil_whisper-0.3.7.dist-info}/METADATA RENAMED Viewed

@@ -1,30 +1,28 @@
 Metadata-Version: 2.1
 Name: dora-distil-whisper
-Version: 0.3.6
+Version: 0.3.7
 Summary: Dora dora-distil-whisper
 Home-page: https://github.com/dora-rs/dora.git
 License: MIT
 Author: Haixuan Xavier Tao
 Author-email: tao.xavier@outlook.com
+Requires-Python: >=3.7,<4.0
 Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.7
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.4
-Classifier: Programming Language :: Python :: 3.5
-Classifier: Programming Language :: Python :: 3.6
 Classifier: Programming Language :: Python :: 3.7
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Requires-Dist: accelerate (>=0.29.2,<0.30.0)
 Requires-Dist: dora-rs (>=0.3.6,<0.4.0)
+Requires-Dist: modelscope (>=1.18.1,<2.0.0)
 Requires-Dist: numpy (<2.0.0)
 Requires-Dist: pyarrow (>=5.0.0)
-Requires-Dist: torch (>=2.1.1,<3.0.0)
-Requires-Dist: transformers (>=4.0.0)
+Requires-Dist: torch (>=2.2.0,<3.0.0)
+Requires-Dist: transformers (>=4.0.0,<5.0.0)
 Project-URL: Documentation, https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md
 Description-Content-Type: text/markdown

dora_distil_whisper-0.3.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+dora_distil_whisper/__init__.py,sha256=Gy4qL4vCeTyA5HR1Yp3ioL4-ClJyW8oi_38CzMuMsBM,358
+dora_distil_whisper/main.py,sha256=-lMXHjnBw0tWnQXyeoKkrbSC4w6F6UyHjzY0GT1EENs,3398
+dora_distil_whisper-0.3.7.dist-info/METADATA,sha256=A-OtHlIzvlzBBuX4E9Zg__hGw8RQbX7XQZSgIPdD9AU,1256
+dora_distil_whisper-0.3.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+dora_distil_whisper-0.3.7.dist-info/entry_points.txt,sha256=Q_8wNgkDYxgoKETJjM6ewXWcr_yzRUgsSeBd0uetuRs,69
+dora_distil_whisper-0.3.7.dist-info/RECORD,,

dora_distil_whisper-0.3.7.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 1.9.1
+Root-Is-Purelib: true
+Tag: py3-none-any

dora_distil_whisper-0.3.6.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-dora_distil_whisper/__init__.py,sha256=Gy4qL4vCeTyA5HR1Yp3ioL4-ClJyW8oi_38CzMuMsBM,358
-dora_distil_whisper/main.py,sha256=xFwK_DhqA-2fWQKPxj6w48NvFVOUvUewcnVd3lnJ0Vc,1123
-dora_distil_whisper-0.3.6.dist-info/METADATA,sha256=gHvOSGNsTHz2djOivsrH59WB-1AE2voM1Nsag10ovlU,1374
-dora_distil_whisper-0.3.6.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
-dora_distil_whisper-0.3.6.dist-info/entry_points.txt,sha256=Q_8wNgkDYxgoKETJjM6ewXWcr_yzRUgsSeBd0uetuRs,69
-dora_distil_whisper-0.3.6.dist-info/RECORD,,

dora_distil_whisper-0.3.6.dist-info/WHEEL DELETED Viewed

@@ -1,4 +0,0 @@
-Wheel-Version: 1.0
-Generator: poetry-core 1.9.0
-Root-Is-Purelib: true
-Tag: py2.py3-none-any

{dora_distil_whisper-0.3.6.dist-info → dora_distil_whisper-0.3.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dora-distil-whisper 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

dora-distil-whisper 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl