dora-distil-whisper 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dora_distil_whisper/main.py +77 -9
- {dora_distil_whisper-0.3.6.dist-info → dora_distil_whisper-0.3.7.dist-info}/METADATA +6 -8
- dora_distil_whisper-0.3.7.dist-info/RECORD +6 -0
- dora_distil_whisper-0.3.7.dist-info/WHEEL +4 -0
- dora_distil_whisper-0.3.6.dist-info/RECORD +0 -6
- dora_distil_whisper-0.3.6.dist-info/WHEEL +0 -4
- {dora_distil_whisper-0.3.6.dist-info → dora_distil_whisper-0.3.7.dist-info}/entry_points.txt +0 -0
dora_distil_whisper/main.py
CHANGED
@@ -3,40 +3,108 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
|
3
3
|
from dora import Node
|
4
4
|
import pyarrow as pa
|
5
5
|
import os
|
6
|
+
from pathlib import Path
|
6
7
|
|
7
|
-
|
8
|
+
DEFAULT_PATH = "openai/whisper-large-v3-turbo"
|
9
|
+
TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "chinese")
|
10
|
+
TRANSLATE = bool(os.getenv("TRANSLATE", "False") in ["True", "true"])
|
11
|
+
|
12
|
+
|
13
|
+
MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH)
|
14
|
+
|
15
|
+
if bool(os.getenv("USE_MODELSCOPE_HUB") in ["True", "true"]):
|
16
|
+
from modelscope import snapshot_download
|
17
|
+
|
18
|
+
if not Path(MODEL_NAME_OR_PATH).exists():
|
19
|
+
MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH)
|
8
20
|
|
9
21
|
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
10
22
|
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
11
23
|
|
12
|
-
model_id = "distil-whisper/distil-large-v3"
|
13
24
|
|
14
25
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
15
|
-
|
26
|
+
MODEL_NAME_OR_PATH,
|
16
27
|
torch_dtype=torch_dtype,
|
17
28
|
low_cpu_mem_usage=True,
|
18
29
|
use_safetensors=True,
|
19
|
-
local_files_only=True,
|
20
30
|
)
|
21
31
|
model.to(device)
|
22
32
|
|
23
|
-
processor = AutoProcessor.from_pretrained(
|
33
|
+
processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH)
|
24
34
|
pipe = pipeline(
|
25
35
|
"automatic-speech-recognition",
|
26
36
|
model=model,
|
27
37
|
tokenizer=processor.tokenizer,
|
28
38
|
feature_extractor=processor.feature_extractor,
|
29
|
-
max_new_tokens=
|
39
|
+
max_new_tokens=400,
|
30
40
|
torch_dtype=torch_dtype,
|
31
41
|
device=device,
|
32
|
-
generate_kwargs={"language": "chinese"},
|
33
42
|
)
|
34
43
|
|
44
|
+
BAD_SENTENCES = [
|
45
|
+
"字幕",
|
46
|
+
"字幕志愿",
|
47
|
+
"中文字幕",
|
48
|
+
"我",
|
49
|
+
"你",
|
50
|
+
"THANK YOU",
|
51
|
+
" Thank you.",
|
52
|
+
" www.microsoft.com",
|
53
|
+
" The",
|
54
|
+
" BANG",
|
55
|
+
" Silence.",
|
56
|
+
" Sous-titrage Société Radio-Canada",
|
57
|
+
" Sous",
|
58
|
+
" Sous-",
|
59
|
+
]
|
60
|
+
|
61
|
+
|
62
|
+
def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):
|
63
|
+
# Check if the text is primarily Chinese (you may need to adjust this threshold)
|
64
|
+
if sum(1 for char in text if "\u4e00" <= char <= "\u9fff") / len(text) > 0.5:
|
65
|
+
# Chinese text processing
|
66
|
+
for repeat_length in range(
|
67
|
+
min_repeat_length, min(max_repeat_length, len(text) // 2)
|
68
|
+
):
|
69
|
+
for i in range(len(text) - repeat_length * 2 + 1):
|
70
|
+
chunk1 = text[i : i + repeat_length]
|
71
|
+
chunk2 = text[i + repeat_length : i + repeat_length * 2]
|
72
|
+
|
73
|
+
if chunk1 == chunk2:
|
74
|
+
return text[: i + repeat_length]
|
75
|
+
else:
|
76
|
+
# Non-Chinese (space-separated) text processing
|
77
|
+
words = text.split()
|
78
|
+
for repeat_length in range(
|
79
|
+
min_repeat_length, min(max_repeat_length, len(words) // 2)
|
80
|
+
):
|
81
|
+
for i in range(len(words) - repeat_length * 2 + 1):
|
82
|
+
chunk1 = " ".join(words[i : i + repeat_length])
|
83
|
+
chunk2 = " ".join(words[i + repeat_length : i + repeat_length * 2])
|
84
|
+
|
85
|
+
if chunk1 == chunk2:
|
86
|
+
return " ".join(words[: i + repeat_length])
|
87
|
+
|
88
|
+
return text
|
89
|
+
|
35
90
|
|
36
91
|
def main():
|
37
92
|
node = Node()
|
38
93
|
for event in node:
|
39
94
|
if event["type"] == "INPUT":
|
40
95
|
audio = event["value"].to_numpy()
|
41
|
-
|
42
|
-
|
96
|
+
confg = (
|
97
|
+
{"language": TARGET_LANGUAGE, "task": "translate"}
|
98
|
+
if TRANSLATE
|
99
|
+
else {
|
100
|
+
"language": TARGET_LANGUAGE,
|
101
|
+
}
|
102
|
+
)
|
103
|
+
result = pipe(
|
104
|
+
audio,
|
105
|
+
generate_kwargs=confg,
|
106
|
+
)
|
107
|
+
if result["text"] in BAD_SENTENCES:
|
108
|
+
continue
|
109
|
+
text = cut_repetition(result["text"])
|
110
|
+
node.send_output("text", pa.array([text]), {"language": TARGET_LANGUAGE})
|
@@ -1,30 +1,28 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dora-distil-whisper
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.7
|
4
4
|
Summary: Dora dora-distil-whisper
|
5
5
|
Home-page: https://github.com/dora-rs/dora.git
|
6
6
|
License: MIT
|
7
7
|
Author: Haixuan Xavier Tao
|
8
8
|
Author-email: tao.xavier@outlook.com
|
9
|
+
Requires-Python: >=3.7,<4.0
|
9
10
|
Classifier: License :: OSI Approved :: MIT License
|
10
|
-
Classifier: Programming Language :: Python :: 2
|
11
|
-
Classifier: Programming Language :: Python :: 2.7
|
12
11
|
Classifier: Programming Language :: Python :: 3
|
13
|
-
Classifier: Programming Language :: Python :: 3.4
|
14
|
-
Classifier: Programming Language :: Python :: 3.5
|
15
|
-
Classifier: Programming Language :: Python :: 3.6
|
16
12
|
Classifier: Programming Language :: Python :: 3.7
|
17
13
|
Classifier: Programming Language :: Python :: 3.8
|
18
14
|
Classifier: Programming Language :: Python :: 3.9
|
19
15
|
Classifier: Programming Language :: Python :: 3.10
|
20
16
|
Classifier: Programming Language :: Python :: 3.11
|
21
17
|
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
22
19
|
Requires-Dist: accelerate (>=0.29.2,<0.30.0)
|
23
20
|
Requires-Dist: dora-rs (>=0.3.6,<0.4.0)
|
21
|
+
Requires-Dist: modelscope (>=1.18.1,<2.0.0)
|
24
22
|
Requires-Dist: numpy (<2.0.0)
|
25
23
|
Requires-Dist: pyarrow (>=5.0.0)
|
26
|
-
Requires-Dist: torch (>=2.
|
27
|
-
Requires-Dist: transformers (>=4.0.0)
|
24
|
+
Requires-Dist: torch (>=2.2.0,<3.0.0)
|
25
|
+
Requires-Dist: transformers (>=4.0.0,<5.0.0)
|
28
26
|
Project-URL: Documentation, https://github.com/dora-rs/dora/blob/main/node-hub/dora-distil-whisper/README.md
|
29
27
|
Description-Content-Type: text/markdown
|
30
28
|
|
@@ -0,0 +1,6 @@
|
|
1
|
+
dora_distil_whisper/__init__.py,sha256=Gy4qL4vCeTyA5HR1Yp3ioL4-ClJyW8oi_38CzMuMsBM,358
|
2
|
+
dora_distil_whisper/main.py,sha256=-lMXHjnBw0tWnQXyeoKkrbSC4w6F6UyHjzY0GT1EENs,3398
|
3
|
+
dora_distil_whisper-0.3.7.dist-info/METADATA,sha256=A-OtHlIzvlzBBuX4E9Zg__hGw8RQbX7XQZSgIPdD9AU,1256
|
4
|
+
dora_distil_whisper-0.3.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
5
|
+
dora_distil_whisper-0.3.7.dist-info/entry_points.txt,sha256=Q_8wNgkDYxgoKETJjM6ewXWcr_yzRUgsSeBd0uetuRs,69
|
6
|
+
dora_distil_whisper-0.3.7.dist-info/RECORD,,
|
@@ -1,6 +0,0 @@
|
|
1
|
-
dora_distil_whisper/__init__.py,sha256=Gy4qL4vCeTyA5HR1Yp3ioL4-ClJyW8oi_38CzMuMsBM,358
|
2
|
-
dora_distil_whisper/main.py,sha256=xFwK_DhqA-2fWQKPxj6w48NvFVOUvUewcnVd3lnJ0Vc,1123
|
3
|
-
dora_distil_whisper-0.3.6.dist-info/METADATA,sha256=gHvOSGNsTHz2djOivsrH59WB-1AE2voM1Nsag10ovlU,1374
|
4
|
-
dora_distil_whisper-0.3.6.dist-info/WHEEL,sha256=IrRNNNJ-uuL1ggO5qMvT1GGhQVdQU54d6ZpYqEZfEWo,92
|
5
|
-
dora_distil_whisper-0.3.6.dist-info/entry_points.txt,sha256=Q_8wNgkDYxgoKETJjM6ewXWcr_yzRUgsSeBd0uetuRs,69
|
6
|
-
dora_distil_whisper-0.3.6.dist-info/RECORD,,
|
{dora_distil_whisper-0.3.6.dist-info → dora_distil_whisper-0.3.7.dist-info}/entry_points.txt
RENAMED
File without changes
|