dora-distil-whisper 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,40 +3,108 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
3
  from dora import Node
4
4
  import pyarrow as pa
5
5
  import os
6
+ from pathlib import Path
6
7
 
7
- os.environ["TRANSFORMERS_OFFLINE"] = "1"
8
+ DEFAULT_PATH = "openai/whisper-large-v3-turbo"
9
+ TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "chinese")
10
+ TRANSLATE = bool(os.getenv("TRANSLATE", "False") in ["True", "true"])
11
+
12
+
13
+ MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)
14
+
15
+ if bool(os.getenv("USE_MODELSCOPE_HUB") in ["True", "true"]):
16
+ from modelscope import snapshot_download
17
+
18
+ if not Path(MODEL_NAME_OR_PATH).exists():
19
+ MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)
8
20
 
9
21
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
22
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
11
23
 
12
- model_id = "distil-whisper/distil-large-v3"
13
24
 
14
25
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
15
- model_id,
26
+ MODEL_NAME_OR_PATH,
16
27
  torch_dtype=torch_dtype,
17
28
  low_cpu_mem_usage=True,
18
29
  use_safetensors=True,
19
- local_files_only=True,
20
30
  )
21
31
  model.to(device)
22
32
 
23
- processor = AutoProcessor.from_pretrained(model_id)
33
+ processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH)
24
34
  pipe = pipeline(
25
35
  "automatic-speech-recognition",
26
36
  model=model,
27
37
  tokenizer=processor.tokenizer,
28
38
  feature_extractor=processor.feature_extractor,
29
- max_new_tokens=128,
39
+ max_new_tokens=400,
30
40
  torch_dtype=torch_dtype,
31
41
  device=device,
32
- generate_kwargs={"language": "chinese"},
33
42
  )
34
43
 
44
+ BAD_SENTENCES = [
45
+ "字幕",
46
+ "字幕志愿",
47
+ "中文字幕",
48
+ "我",
49
+ "你",
50
+ "THANK YOU",
51
+ " Thank you.",
52
+ " www.microsoft.com",
53
+ " The",
54
+ " BANG",
55
+ " Silence.",
56
+ " Sous-titrage Société Radio-Canada",
57
+ " Sous",
58
+ " Sous-",
59
+ ]
60
+
61
+
62
+ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
63
+ # Check if the text is primarily Chinese (you may need to adjust this threshold)
64
+ if sum(1 for char in text if "\u4e00" <= char <= "\u9fff") / len(text) > 0.5:
65
+ # Chinese text processing
66
+ for repeat_length in range(
67
+ min_repeat_length, min(max_repeat_length, len(text) // 2)
68
+ ):
69
+ for i in range(len(text) - repeat_length * 2 + 1):
70
+ chunk1 = text[i : i + repeat_length]
71
+ chunk2 = text[i + repeat_length : i + repeat_length * 2]
72
+
73
+ if chunk1 == chunk2:
74
+ return text[: i + repeat_length]
75
+ else:
76
+ # Non-Chinese (space-separated) text processing
77
+ words = text.split()
78
+ for repeat_length in range(
79
+ min_repeat_length, min(max_repeat_length, len(words) // 2)
80
+ ):
81
+ for i in range(len(words) - repeat_length * 2 + 1):
82
+ chunk1 = " ".join(words[i : i + repeat_length])
83
+ chunk2 = " ".join(words[i + repeat_length : i + repeat_length * 2])
84
+
85
+ if chunk1 == chunk2:
86
+ return " ".join(words[: i + repeat_length])
87
+
88
+ return text
89
+
35
90
 
36
91
  def main():
37
92
  node = Node()
38
93
  for event in node:
39
94
  if event["type"] == "INPUT":
40
95
  audio = event["value"].to_numpy()
41
- result = pipe(audio)
42
- node.send_output("text", pa.array([result["text"]]))
96
+ confg = (
97
+ {"language": TARGET_LANGUAGE, "task": "translate"}
98
+ if TRANSLATE
99
+ else {
100
+ "language": TARGET_LANGUAGE,
101
+ }
102
+ )
103
+ result = pipe(
104
+ audio,
105
+ generate_kwargs=confg,
106
+ )
107
+ if result["text"] in BAD_SENTENCES:
108
+ continue
109
+ text = cut_repetition(result["text"])
110
+ node.send_output("text", pa.array([text]), {"language": TARGET_LANGUAGE})
@@ -1,30 +1,28 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dora-distil-whisper
3
- Version: 0.3.6
3
+ Version: 0.3.7
4
4
  Summary: Dora dora-distil-whisper
5
5
  Home-page: https://github.com/dora-rs/dora.git
6
6
  License: MIT
7
7
  Author: Haixuan Xavier Tao
8
8
  Author-email: tao.xavier@outlook.com
9
+ Requires-Python: >=3.7,<4.0
9
10
  Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 2
11
- Classifier: Programming Language :: Python :: 2.7
12
11
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.4
14
- Classifier: Programming Language :: Python :: 3.5
15
- Classifier: Programming Language :: Python :: 3.6
16
12
  Classifier: Programming Language :: Python :: 3.7
17
13
  Classifier: Programming Language :: Python :: 3.8
18
14
  Classifier: Programming Language :: Python :: 3.9
19
15
  Classifier: Programming Language :: Python :: 3.10
20
16
  Classifier: Programming Language :: Python :: 3.11
21
17
  Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
22
19
  Requires-Dist: accelerate (>=0.29.2,<0.30.0)
23
20
  Requires-Dist: dora-rs (>=0.3.6,<0.4.0)
21
+ Requires-Dist: modelscope (>=1.18.1,<2.0.0)
24
22
  Requires-Dist: numpy (<2.0.0)
25
23
  Requires-Dist: pyarrow (>=5.0.0)
26
- Requires-Dist: torch (>=2.1.1,<3.0.0)
27
- Requires-Dist: transformers (>=4.0.0)
24
+ Requires-Dist: torch (>=2.2.0,<3.0.0)
25
+ Requires-Dist: transformers (>=4.0.0,<5.0.0)
28
26
  Project-URL: Documentation, https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md
29
27
  Description-Content-Type: text/markdown
30
28
 
@@ -0,0 +1,6 @@
1
+ dora_distil_whisper/__init__.py,sha256=Gy4qL4vCeTyA5HR1Yp3ioL4-ClJyW8oi_38CzMuMsBM,358
2
+ dora_distil_whisper/main.py,sha256=-lMXHjnBw0tWnQXyeoKkrbSC4w6F6UyHjzY0GT1EENs,3398
3
+ dora_distil_whisper-0.3.7.dist-info/METADATA,sha256=A-OtHlIzvlzBBuX4E9Zg__hGw8RQbX7XQZSgIPdD9AU,1256
4
+ dora_distil_whisper-0.3.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
5
+ dora_distil_whisper-0.3.7.dist-info/entry_points.txt,sha256=Q_8wNgkDYxgoKETJjM6ewXWcr_yzRUgsSeBd0uetuRs,69
6
+ dora_distil_whisper-0.3.7.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -1,6 +0,0 @@
1
- dora_distil_whisper/__init__.py,sha256=Gy4qL4vCeTyA5HR1Yp3ioL4-ClJyW8oi_38CzMuMsBM,358
2
- dora_distil_whisper/main.py,sha256=xFwK_DhqA-2fWQKPxj6w48NvFVOUvUewcnVd3lnJ0Vc,1123
3
- dora_distil_whisper-0.3.6.dist-info/METADATA,sha256=gHvOSGNsTHz2djOivsrH59WB-1AE2voM1Nsag10ovlU,1374
4
- dora_distil_whisper-0.3.6.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
5
- dora_distil_whisper-0.3.6.dist-info/entry_points.txt,sha256=Q_8wNgkDYxgoKETJjM6ewXWcr_yzRUgsSeBd0uetuRs,69
6
- dora_distil_whisper-0.3.6.dist-info/RECORD,,
@@ -1,4 +0,0 @@
1
- Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
3
- Root-Is-Purelib: true
4
- Tag: py2.py3-none-any