PyPI - phonexia-enhanced-speech-to-text-built-on-whisper-client - Versions diffs - 1.3.0__tar.gz → 1.5.0__tar.gz - Mend

phonexia-enhanced-speech-to-text-built-on-whisper-client 1.3.0tar.gz → 1.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

{phonexia_enhanced_speech_to_text_built_on_whisper_client-1.3.0 → phonexia_enhanced_speech_to_text_built_on_whisper_client-1.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: phonexia-enhanced-speech-to-text-built-on-whisper-client
-Version: 1.3.0
+Version: 1.5.0
 Summary: Client for communication with Phonexia Enhanced Speech To Text Built On Whisper microservice.
 Keywords: grpc,transcription,STT,ASR,speech to text,speech,language,microservice
 Author: Phonexia
@@ -13,7 +13,10 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: grpcio (>=1.54.0,<2.0.0)
+Requires-Dist: numpy (<2.0.0) ; python_version < "3.12"
+Requires-Dist: numpy (>=2.0.0) ; python_version >= "3.12"
 Requires-Dist: phonexia-grpc (>=2.0.0,<3.0.0)
+Requires-Dist: soundfile (>=0.12.1,<0.13.0)
 Project-URL: Homepage, https://phonexia.com
 Project-URL: Issues, https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40
 Project-URL: protofiles, https://github.com/phonexia/protofiles

{phonexia_enhanced_speech_to_text_built_on_whisper_client-1.3.0 → phonexia_enhanced_speech_to_text_built_on_whisper_client-1.5.0}/phonexia_enhanced_speech_to_text_built_on_whisper_client.py RENAMED Viewed

@@ -2,15 +2,16 @@ import argparse
 import json
 import logging
 import os
+from datetime import datetime
 from enum import Enum
 from typing import Iterator, Optional
 import google.protobuf.duration_pb2
 import grpc
-from google.protobuf.json_format import MessageToDict
 import phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2_grpc as stt_grpc
-from phonexia.grpc.common.core_pb2 import Audio, TimeRange
+import soundfile
+from google.protobuf.json_format import MessageToDict
+from phonexia.grpc.common.core_pb2 import Audio, RawAudioConfig, TimeRange
 from phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2 import (
     TranscribeConfig,
     TranscribeRequest,
@@ -44,19 +45,42 @@ def transcribe_request_iterator(
     start: Optional[float],
     end: Optional[float],
     enable_language_switching: bool = False,
+    use_raw_audio: bool = False,
 ) -> Iterator[TranscribeRequest]:
     time_range = TimeRange(start=time_to_duration(start), end=time_to_duration(end))
     config = TranscribeConfig(
         language=specified_language, enable_language_switching=enable_language_switching
     )
-    with open(file, "rb") as f:
-        while chunk := f.read(CHUNK_SIZE):
-            yield TranscribeRequest(
-                audio=Audio(content=chunk, time_range=time_range), config=config
+    if use_raw_audio:
+        with soundfile.SoundFile(file) as r:
+            raw_audio_config = RawAudioConfig(
+                channels=r.channels,
+                sample_rate_hertz=r.samplerate,
+                encoding=RawAudioConfig.AudioEncoding.PCM16,
             )
-            time_range = None
-            config = None
+            for data in r.blocks(blocksize=r.samplerate, dtype="int16"):
+                logging.debug("Sending chunk of size %d samples", len(data))
+                yield TranscribeRequest(
+                    audio=Audio(
+                        content=data.flatten().tobytes(),
+                        time_range=time_range,
+                        raw_audio_config=raw_audio_config,
+                    ),
+                    config=config,
+                )
+                time_range = None
+                raw_audio_config = None
+                config = None
+    else:
+        with open(file, "rb") as f:
+            while chunk := f.read(CHUNK_SIZE):
+                yield TranscribeRequest(
+                    audio=Audio(content=chunk, time_range=time_range), config=config
+                )
+                time_range = None
+                config = None
 def translate_request_iterator(
@@ -87,6 +111,7 @@ def transcribe(
     metadata: Optional[list],
     task: Task,
     enable_language_switching: bool = False,
+    use_raw_audio: bool = False,
 ):
     stub = stt_grpc.SpeechToTextStub(channel)
     if task == Task.transcribe:
@@ -97,6 +122,7 @@ def transcribe(
                 start=start,
                 end=end,
                 enable_language_switching=enable_language_switching,
+                use_raw_audio=use_raw_audio,
             ),
             metadata=metadata,
         )
@@ -197,6 +223,7 @@ def main():
         help="Enable dynamic language switching during transcription, with the language being detected approximately every 30 seconds",
     )
     parser.add_argument("file", type=str, help="Path to input file")
+    parser.add_argument("--use_raw_audio", action="store_true", help="Send a raw audio in")
     args = parser.parse_args()
@@ -227,6 +254,8 @@ def main():
             else grpc.insecure_channel(target=args.host)
         )
+        start_time = datetime.now()
         transcribe(
             channel=channel,
             file=args.file,
@@ -236,8 +265,11 @@ def main():
             metadata=args.metadata,
             task=args.task,
             enable_language_switching=args.enable_language_switching,
+            use_raw_audio=args.use_raw_audio,
         )
+        logging.debug(f"Elapsed time {(datetime.now() - start_time)}")
     except grpc.RpcError:
         logging.exception("RPC failed")
         exit(1)

{phonexia_enhanced_speech_to_text_built_on_whisper_client-1.3.0 → phonexia_enhanced_speech_to_text_built_on_whisper_client-1.5.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "phonexia-enhanced-speech-to-text-built-on-whisper-client"
-version = "1.3.0"
+version = "1.5.0"
 description = "Client for communication with Phonexia Enhanced Speech To Text Built On Whisper microservice."
 readme = "pypi-README.md"
 keywords = ["grpc", "transcription", "STT", "ASR", "speech to text", "speech", "language", "microservice"]
@@ -18,6 +18,11 @@ enhanced_speech_to_text_built_on_whisper_client = 'phonexia_enhanced_speech_to_t
 python = ">=3.8,<4.0"
 grpcio = "^1.54.0"
 phonexia-grpc = {version="^2.0.0", source="pypi"}
+soundfile = "^0.12.1"
+numpy = [
+    { version = "<2.0.0", markers = "python_version < '3.12'" },
+    { version = ">=2.0.0", markers = "python_version >= '3.12'" }
+]
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.0.0"
@@ -28,14 +33,12 @@ black = "^24.0.0"
 ruff = "^0.4.0"
 [[tool.poetry.source]]
-name = "gitlab"
-url = "https://gitlab.cloud.phonexia.com/api/v4/groups/39/-/packages/pypi/simple"
+name = "PyPI"
 priority = "primary"
 [[tool.poetry.source]]
-name = "PyPI"
-priority = "default"
+name = "gitlab"
+url = "https://gitlab.cloud.phonexia.com/api/v4/groups/39/-/packages/pypi/simple"
 [build-system]
 requires = ["poetry-core>=1.0.0"]

{phonexia_enhanced_speech_to_text_built_on_whisper_client-1.3.0 → phonexia_enhanced_speech_to_text_built_on_whisper_client-1.5.0}/pypi-README.md RENAMED Viewed

File without changes

phonexia-enhanced-speech-to-text-built-on-whisper-client 1.3.0__tar.gz → 1.5.0__tar.gz

phonexia-enhanced-speech-to-text-built-on-whisper-client 1.3.0tar.gz → 1.5.0tar.gz