phonexia-enhanced-speech-to-text-built-on-whisper-client 1.3.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phonexia-enhanced-speech-to-text-built-on-whisper-client
3
- Version: 1.3.0
3
+ Version: 1.5.0
4
4
  Summary: Client for communication with Phonexia Enhanced Speech To Text Built On Whisper microservice.
5
5
  Keywords: grpc,transcription,STT,ASR,speech to text,speech,language,microservice
6
6
  Author: Phonexia
@@ -13,7 +13,10 @@ Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Requires-Dist: grpcio (>=1.54.0,<2.0.0)
16
+ Requires-Dist: numpy (<2.0.0) ; python_version < "3.12"
17
+ Requires-Dist: numpy (>=2.0.0) ; python_version >= "3.12"
16
18
  Requires-Dist: phonexia-grpc (>=2.0.0,<3.0.0)
19
+ Requires-Dist: soundfile (>=0.12.1,<0.13.0)
17
20
  Project-URL: Homepage, https://phonexia.com
18
21
  Project-URL: Issues, https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40
19
22
  Project-URL: protofiles, https://github.com/phonexia/protofiles
@@ -2,15 +2,16 @@ import argparse
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ from datetime import datetime
5
6
  from enum import Enum
6
7
  from typing import Iterator, Optional
7
8
 
8
9
  import google.protobuf.duration_pb2
9
10
  import grpc
10
- from google.protobuf.json_format import MessageToDict
11
-
12
11
  import phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2_grpc as stt_grpc
13
- from phonexia.grpc.common.core_pb2 import Audio, TimeRange
12
+ import soundfile
13
+ from google.protobuf.json_format import MessageToDict
14
+ from phonexia.grpc.common.core_pb2 import Audio, RawAudioConfig, TimeRange
14
15
  from phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2 import (
15
16
  TranscribeConfig,
16
17
  TranscribeRequest,
@@ -44,19 +45,42 @@ def transcribe_request_iterator(
44
45
  start: Optional[float],
45
46
  end: Optional[float],
46
47
  enable_language_switching: bool = False,
48
+ use_raw_audio: bool = False,
47
49
  ) -> Iterator[TranscribeRequest]:
48
50
  time_range = TimeRange(start=time_to_duration(start), end=time_to_duration(end))
49
51
  config = TranscribeConfig(
50
52
  language=specified_language, enable_language_switching=enable_language_switching
51
53
  )
52
54
 
53
- with open(file, "rb") as f:
54
- while chunk := f.read(CHUNK_SIZE):
55
- yield TranscribeRequest(
56
- audio=Audio(content=chunk, time_range=time_range), config=config
55
+ if use_raw_audio:
56
+ with soundfile.SoundFile(file) as r:
57
+ raw_audio_config = RawAudioConfig(
58
+ channels=r.channels,
59
+ sample_rate_hertz=r.samplerate,
60
+ encoding=RawAudioConfig.AudioEncoding.PCM16,
57
61
  )
58
- time_range = None
59
- config = None
62
+
63
+ for data in r.blocks(blocksize=r.samplerate, dtype="int16"):
64
+ logging.debug("Sending chunk of size %d samples", len(data))
65
+ yield TranscribeRequest(
66
+ audio=Audio(
67
+ content=data.flatten().tobytes(),
68
+ time_range=time_range,
69
+ raw_audio_config=raw_audio_config,
70
+ ),
71
+ config=config,
72
+ )
73
+ time_range = None
74
+ raw_audio_config = None
75
+ config = None
76
+ else:
77
+ with open(file, "rb") as f:
78
+ while chunk := f.read(CHUNK_SIZE):
79
+ yield TranscribeRequest(
80
+ audio=Audio(content=chunk, time_range=time_range), config=config
81
+ )
82
+ time_range = None
83
+ config = None
60
84
 
61
85
 
62
86
  def translate_request_iterator(
@@ -87,6 +111,7 @@ def transcribe(
87
111
  metadata: Optional[list],
88
112
  task: Task,
89
113
  enable_language_switching: bool = False,
114
+ use_raw_audio: bool = False,
90
115
  ):
91
116
  stub = stt_grpc.SpeechToTextStub(channel)
92
117
  if task == Task.transcribe:
@@ -97,6 +122,7 @@ def transcribe(
97
122
  start=start,
98
123
  end=end,
99
124
  enable_language_switching=enable_language_switching,
125
+ use_raw_audio=use_raw_audio,
100
126
  ),
101
127
  metadata=metadata,
102
128
  )
@@ -197,6 +223,7 @@ def main():
197
223
  help="Enable dynamic language switching during transcription, with the language being detected approximately every 30 seconds",
198
224
  )
199
225
  parser.add_argument("file", type=str, help="Path to input file")
226
+ parser.add_argument("--use_raw_audio", action="store_true", help="Send a raw audio in")
200
227
 
201
228
  args = parser.parse_args()
202
229
 
@@ -227,6 +254,8 @@ def main():
227
254
  else grpc.insecure_channel(target=args.host)
228
255
  )
229
256
 
257
+ start_time = datetime.now()
258
+
230
259
  transcribe(
231
260
  channel=channel,
232
261
  file=args.file,
@@ -236,8 +265,11 @@ def main():
236
265
  metadata=args.metadata,
237
266
  task=args.task,
238
267
  enable_language_switching=args.enable_language_switching,
268
+ use_raw_audio=args.use_raw_audio,
239
269
  )
240
270
 
271
+ logging.debug(f"Elapsed time {(datetime.now() - start_time)}")
272
+
241
273
  except grpc.RpcError:
242
274
  logging.exception("RPC failed")
243
275
  exit(1)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "phonexia-enhanced-speech-to-text-built-on-whisper-client"
3
- version = "1.3.0"
3
+ version = "1.5.0"
4
4
  description = "Client for communication with Phonexia Enhanced Speech To Text Built On Whisper microservice."
5
5
  readme = "pypi-README.md"
6
6
  keywords = ["grpc", "transcription", "STT", "ASR", "speech to text", "speech", "language", "microservice"]
@@ -18,6 +18,11 @@ enhanced_speech_to_text_built_on_whisper_client = 'phonexia_enhanced_speech_to_t
18
18
  python = ">=3.8,<4.0"
19
19
  grpcio = "^1.54.0"
20
20
  phonexia-grpc = {version="^2.0.0", source="pypi"}
21
+ soundfile = "^0.12.1"
22
+ numpy = [
23
+ { version = "<2.0.0", markers = "python_version < '3.12'" },
24
+ { version = ">=2.0.0", markers = "python_version >= '3.12'" }
25
+ ]
21
26
 
22
27
  [tool.poetry.group.dev.dependencies]
23
28
  pytest = "^8.0.0"
@@ -28,14 +33,12 @@ black = "^24.0.0"
28
33
  ruff = "^0.4.0"
29
34
 
30
35
  [[tool.poetry.source]]
31
- name = "gitlab"
32
- url = "https://gitlab.cloud.phonexia.com/api/v4/groups/39/-/packages/pypi/simple"
36
+ name = "PyPI"
33
37
  priority = "primary"
34
38
 
35
-
36
39
  [[tool.poetry.source]]
37
- name = "PyPI"
38
- priority = "default"
40
+ name = "gitlab"
41
+ url = "https://gitlab.cloud.phonexia.com/api/v4/groups/39/-/packages/pypi/simple"
39
42
 
40
43
  [build-system]
41
44
  requires = ["poetry-core>=1.0.0"]