phonexia-enhanced-speech-to-text-built-on-whisper-client 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.1
2
+ Name: phonexia-enhanced-speech-to-text-built-on-whisper-client
3
+ Version: 1.2.1
4
+ Summary: Client for communication with Phonexia Enhanced Speech To Text Built On Whisper microservice.
5
+ Keywords: grpc,transcription,STT,ASR,speech to text,speech,language,microservice
6
+ Author: Phonexia
7
+ Author-email: info@phonexia.com
8
+ Requires-Python: >=3.8,<4.0
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Dist: grpcio (>=1.54.0,<2.0.0)
16
+ Requires-Dist: phonexia-grpc (>=1.0.0,<2.0.0)
17
+ Project-URL: Homepage, https://phonexia.com
18
+ Project-URL: Issues, https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40
19
+ Project-URL: protofiles, https://github.com/phonexia/protofiles
20
+ Description-Content-Type: text/markdown
21
+
22
+
23
+ ![](https://www.phonexia.com/wp-content/uploads/PHX_logotype_basic_2016_positive_transparent_RGB.png)
24
+
25
+ # Phonexia enhanced speech to text built on whisper client
26
+
27
+ This module contains client for communication with [enhanced speech to text built on whisper](https://hub.docker.com/r/phonexia/enhanced-speech-to-text-built-on-whisper/) developed by [Phonexia](https://phonexia.com).
28
+
29
+ To use this client you will first need a running instance of any *Phonexia enhanced speech to text built on whisper microservice*. If you don't yet have any running instance, don't hesitate to [contact our sales department](mailto:info@phonexia.com).
30
+
31
+ You can learn more about the enhanced speech to text built on whisper technology [here](https://docs.cloud.phonexia.com/docs/category/enhanced-speech-to-text-built-on-whisper).
32
+
33
+ On [this page](https://docs.cloud.phonexia.com/docs/products/speech-platform-4/grpc/api/phonexia/grpc/technologies/enhanced_speech_to_text_built_on_whisper/v1/enhanced_speech_to_text_built_on_whisper.proto), you will find a *gRPC API* reference for *enhanced speech to text built on whisper microservice*.
@@ -0,0 +1,252 @@
1
+ import argparse
2
+ import logging
3
+ import os
4
+ from enum import Enum
5
+ from typing import Iterator, Optional
6
+
7
+ import google.protobuf.duration_pb2
8
+ import grpc
9
+ import phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2_grpc as stt_grpc
10
+ from phonexia.grpc.common.core_pb2 import Audio, TimeRange
11
+ from phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2 import (
12
+ TranscribeConfig,
13
+ TranscribeRequest,
14
+ TranslateConfig,
15
+ TranslateRequest,
16
+ )
17
+
18
+ CHUNK_SIZE = 32000
19
+
20
+
21
+ class Task(Enum):
22
+ transcribe = "transcribe"
23
+ translate = "translate"
24
+
25
+ def __str__(self):
26
+ return self.value
27
+
28
+
29
+ def time_to_duration(time: float) -> google.protobuf.duration_pb2.Duration | None:
30
+ if time is None:
31
+ return None
32
+ duration = google.protobuf.duration_pb2.Duration()
33
+ duration.seconds = int(time)
34
+ duration.nanos = int((time - duration.seconds) * 1e9)
35
+ return duration
36
+
37
+
38
+ def transcribe_request_iterator(
39
+ file: str,
40
+ specified_language: Optional[str],
41
+ start: Optional[float],
42
+ end: Optional[float],
43
+ enable_language_switching: bool = False,
44
+ ) -> Iterator[TranscribeRequest]:
45
+ time_range = TimeRange(start=time_to_duration(start), end=time_to_duration(end))
46
+ config = TranscribeConfig(
47
+ language=specified_language, enable_language_switching=enable_language_switching
48
+ )
49
+
50
+ with open(file, "rb") as f:
51
+ while chunk := f.read(CHUNK_SIZE):
52
+ yield TranscribeRequest(
53
+ audio=Audio(content=chunk, time_range=time_range), config=config
54
+ )
55
+ time_range = None
56
+ config = None
57
+
58
+
59
+ def translate_request_iterator(
60
+ file: str,
61
+ specified_language: Optional[str],
62
+ start: Optional[float],
63
+ end: Optional[float],
64
+ enable_language_switching: bool = False,
65
+ ) -> Iterator[TranslateRequest]:
66
+ time_range = TimeRange(start=time_to_duration(start), end=time_to_duration(end))
67
+ config = TranslateConfig(
68
+ source_language=specified_language, enable_language_switching=enable_language_switching
69
+ )
70
+
71
+ with open(file, "rb") as f:
72
+ while chunk := f.read(CHUNK_SIZE):
73
+ yield TranslateRequest(audio=Audio(content=chunk, time_range=time_range), config=config)
74
+ time_range = None
75
+ config = None
76
+
77
+
78
+ def transcribe(
79
+ channel: grpc.Channel,
80
+ file: str,
81
+ language: Optional[str],
82
+ start: Optional[float],
83
+ end: Optional[float],
84
+ metadata: Optional[list],
85
+ task: Task,
86
+ enable_language_switching: bool = False,
87
+ ):
88
+ stub = stt_grpc.SpeechToTextStub(channel)
89
+ if task == Task.transcribe:
90
+ response = stub.Transcribe(
91
+ transcribe_request_iterator(
92
+ file=file,
93
+ specified_language=language,
94
+ start=start,
95
+ end=end,
96
+ enable_language_switching=enable_language_switching,
97
+ ),
98
+ metadata=metadata,
99
+ )
100
+ elif task == Task.translate:
101
+ response = stub.Translate(
102
+ translate_request_iterator(
103
+ file=file,
104
+ specified_language=language,
105
+ start=start,
106
+ end=end,
107
+ enable_language_switching=enable_language_switching,
108
+ ),
109
+ metadata=metadata,
110
+ )
111
+ else:
112
+ raise RuntimeError("Unknown task")
113
+
114
+ warning_message = []
115
+ for _response in response:
116
+ for segment in _response.result.one_best.segments:
117
+ print(
118
+ f"[{segment.start_time.ToJsonString()} -> {segment.end_time.ToJsonString()} "
119
+ + (
120
+ f"{segment.language}"
121
+ if task == Task.transcribe
122
+ else f"{segment.source_language} -> {segment.language}"
123
+ )
124
+ + f"] {segment.text}"
125
+ )
126
+ if segment.source_language != segment.detected_source_language:
127
+ warning_message.append(
128
+ f"Language '{segment.detected_source_language}' was detected, but the license does not support this language. "
129
+ f"Instead the segment was {'transcribed' if task == Task.transcribe else 'translated'} with the "
130
+ + (
131
+ f"closest available source language '{segment.source_language}'"
132
+ if language is None
133
+ else f"language '{language}' that was enforced by argument '--language'"
134
+ )
135
+ + ". For more info on this problem don't hesitate to contact Phonexia."
136
+ )
137
+ if _response.HasField("processed_audio_length"):
138
+ print(f"Processed audio length: {_response.processed_audio_length.ToJsonString()}")
139
+
140
+ warning_message = set(warning_message)
141
+ if len(warning_message) > 0:
142
+ print()
143
+ for warning in warning_message:
144
+ print(f"WARNING: {warning}")
145
+
146
+
147
+ def main():
148
+ parser = argparse.ArgumentParser(
149
+ description=(
150
+ "Enhanced Speech to Text Built on Whisper gRPC client. Transcribes input audio into segments"
151
+ " with timestamps."
152
+ )
153
+ )
154
+
155
+ parser.add_argument(
156
+ "-H",
157
+ "--host",
158
+ type=str,
159
+ default="localhost:8080",
160
+ help="Server address, default: localhost:8080",
161
+ )
162
+ parser.add_argument(
163
+ "-l",
164
+ "--log_level",
165
+ type=str,
166
+ default="error",
167
+ choices=["critical", "error", "warning", "info", "debug"],
168
+ )
169
+ parser.add_argument(
170
+ "--metadata",
171
+ metavar="key=value",
172
+ nargs="+",
173
+ type=lambda x: tuple(x.split("=")),
174
+ help="Custom client metadata",
175
+ )
176
+ parser.add_argument("--use_ssl", action="store_true", help="Use SSL connection")
177
+ parser.add_argument("--start", type=float, help="Audio start time")
178
+ parser.add_argument("--end", type=float, help="Audio end time")
179
+
180
+ parser.add_argument(
181
+ "--language",
182
+ type=str,
183
+ default=None,
184
+ help=(
185
+ "Force transcription to specified language, if not set, language is detected"
186
+ " automatically"
187
+ ),
188
+ )
189
+ parser.add_argument(
190
+ "--task",
191
+ type=Task,
192
+ default=Task.transcribe,
193
+ choices=list(Task),
194
+ help="Select whether to transcribe or translate the recording",
195
+ )
196
+ parser.add_argument(
197
+ "--enable-language-switching",
198
+ action="store_true",
199
+ help="Enable dynamic language switching during transcription, with the language being detected approximately every 30 seconds",
200
+ )
201
+ parser.add_argument("file", type=str, help="Path to input file")
202
+
203
+ args = parser.parse_args()
204
+
205
+ if args.start is not None and args.start < 0:
206
+ raise ValueError("Parameter 'start' must be a non-negative float.")
207
+
208
+ if args.end is not None and args.end <= 0:
209
+ raise ValueError("Parameter 'end' must be a positive float.")
210
+
211
+ if args.start is not None and args.end is not None and args.start >= args.end:
212
+ raise ValueError("Parameter 'end' must be larger than 'start'.")
213
+
214
+ logging.basicConfig(
215
+ level=args.log_level.upper(),
216
+ format="[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s",
217
+ datefmt="%Y-%m-%d %H:%M:%S",
218
+ )
219
+
220
+ if not os.path.isfile(args.file):
221
+ logging.error(f"no such file {args.file}")
222
+ exit(1)
223
+
224
+ try:
225
+ logging.info(f"Connecting to {args.host}")
226
+ channel = (
227
+ grpc.secure_channel(target=args.host, credentials=grpc.ssl_channel_credentials())
228
+ if args.use_ssl
229
+ else grpc.insecure_channel(target=args.host)
230
+ )
231
+
232
+ transcribe(
233
+ channel=channel,
234
+ file=args.file,
235
+ language=args.language,
236
+ start=args.start,
237
+ end=args.end,
238
+ metadata=args.metadata,
239
+ task=args.task,
240
+ enable_language_switching=args.enable_language_switching,
241
+ )
242
+
243
+ except grpc.RpcError:
244
+ logging.exception("RPC failed")
245
+ exit(1)
246
+ except Exception:
247
+ logging.exception("Unknown error")
248
+ exit(1)
249
+
250
+
251
+ if __name__ == "__main__":
252
+ main()
@@ -0,0 +1,12 @@
1
+
2
+ ![](https://www.phonexia.com/wp-content/uploads/PHX_logotype_basic_2016_positive_transparent_RGB.png)
3
+
4
+ # Phonexia enhanced speech to text built on whisper client
5
+
6
+ This module contains client for communication with [enhanced speech to text built on whisper](https://hub.docker.com/r/phonexia/enhanced-speech-to-text-built-on-whisper/) developed by [Phonexia](https://phonexia.com).
7
+
8
+ To use this client you will first need a running instance of any *Phonexia enhanced speech to text built on whisper microservice*. If you don't yet have any running instance, don't hesitate to [contact our sales department](mailto:info@phonexia.com).
9
+
10
+ You can learn more about the enhanced speech to text built on whisper technology [here](https://docs.cloud.phonexia.com/docs/category/enhanced-speech-to-text-built-on-whisper).
11
+
12
+ On [this page](https://docs.cloud.phonexia.com/docs/products/speech-platform-4/grpc/api/phonexia/grpc/technologies/enhanced_speech_to_text_built_on_whisper/v1/enhanced_speech_to_text_built_on_whisper.proto), you will find a *gRPC API* reference for *enhanced speech to text built on whisper microservice*.
@@ -0,0 +1,92 @@
1
+ [tool.poetry]
2
+ name = "phonexia-enhanced-speech-to-text-built-on-whisper-client"
3
+ version = "1.2.1"
4
+ description = "Client for communication with Phonexia Enhanced Speech To Text Built On Whisper microservice."
5
+ readme = "pypi-README.md"
6
+ keywords = ["grpc", "transcription", "STT", "ASR", "speech to text", "speech", "language", "microservice"]
7
+ authors = ["Phonexia <info@phonexia.com>"]
8
+
9
+ [tool.poetry.urls]
10
+ Homepage = "https://phonexia.com"
11
+ Issues = "https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40"
12
+ protofiles = "https://github.com/phonexia/protofiles"
13
+
14
+ [tool.poetry.scripts]
15
+ enhanced_speech_to_text_built_on_whisper_client = 'phonexia_enhanced_speech_to_text_built_on_whisper_client:main'
16
+
17
+ [tool.poetry.dependencies]
18
+ python = ">=3.8,<4.0"
19
+ grpcio = "^1.54.0"
20
+ phonexia-grpc = {version="^1.0.0", source="pypi"}
21
+
22
+ [tool.poetry.group.dev.dependencies]
23
+ pytest = "^8.0.0"
24
+ pytest-cov = "^5.0.0"
25
+ pytest-env = "^1.0.0"
26
+ pytest-random-order = "^1.1.0"
27
+ black = "^24.0.0"
28
+ ruff = "^0.4.0"
29
+
30
+ [[tool.poetry.source]]
31
+ name = "gitlab"
32
+ url = "https://gitlab.cloud.phonexia.com/api/v4/groups/39/-/packages/pypi/simple"
33
+ priority = "primary"
34
+
35
+
36
+ [[tool.poetry.source]]
37
+ name = "PyPI"
38
+ priority = "default"
39
+
40
+ [build-system]
41
+ requires = ["poetry-core>=1.0.0"]
42
+ build-backend = "poetry.core.masonry.api"
43
+
44
+ [tool.black]
45
+ line-length = 100
46
+ target-version = ['py38']
47
+ preview = true
48
+
49
+ [tool.ruff]
50
+ target-version = "py38"
51
+ line-length = 100
52
+ fix = true
53
+ select = [
54
+ # flake8-2020
55
+ "YTT",
56
+ # flake8-bandit
57
+ "S",
58
+ # flake8-bugbear
59
+ "B",
60
+ # flake8-builtins
61
+ "A",
62
+ # flake8-comprehensions
63
+ "C4",
64
+ # flake8-debugger
65
+ "T10",
66
+ # flake8-simplify
67
+ "SIM",
68
+ # isort
69
+ "I",
70
+ # mccabe
71
+ "C90",
72
+ # pycodestyle
73
+ "E", "W",
74
+ # pyflakes
75
+ "F",
76
+ # pygrep-hooks
77
+ "PGH",
78
+ # pyupgrade
79
+ "UP",
80
+ # ruff
81
+ "RUF",
82
+ # tryceratops
83
+ "TRY",
84
+ ]
85
+ ignore = [
86
+ # LineTooLong
87
+ "E501",
88
+ # DoNotAssignLambda
89
+ "E731",
90
+ # RaiseVanillaArgs aka Avoid specifying long messages outside the exception class
91
+ "TRY003",
92
+ ]