PyPI - phonexia-enhanced-speech-to-text-built-on-whisper-client - Versions diffs - 1.10.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

phonexia-enhanced-speech-to-text-built-on-whisper-client 1.10.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info → phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,27 +1,18 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: phonexia-enhanced-speech-to-text-built-on-whisper-client
-Version: 1.10.0
-Summary: Client for communication with Phonexia Enhanced Speech To Text Built On Whisper microservice.
+Version: 2.0.0
+Summary: Audio Quality Estimation Client
+Author-email: Phonexia <info@phonexia.com>
 Keywords: grpc,transcription,STT,ASR,speech to text,speech,language,microservice
-Author: Phonexia
-Author-email: info@phonexia.com
-Requires-Python: >=3.9,<4.0
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Requires-Dist: grpcio (>=1.54.0,<2.0.0)
-Requires-Dist: numpy (<2.0.0) ; python_version < "3.12"
-Requires-Dist: numpy (>=2.0.0) ; python_version >= "3.12"
-Requires-Dist: phonexia-grpc (>=2.0.0,<3.0.0)
-Requires-Dist: protobuf (>=5.0.0,<6.0.0)
-Requires-Dist: soundfile (>=0.13.0,<0.14.0)
-Project-URL: Homepage, https://phonexia.com
-Project-URL: Issues, https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40
-Project-URL: protofiles, https://github.com/phonexia/protofiles
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
+Requires-Dist: more-itertools>=10.6.0
+Requires-Dist: phonexia-grpc>=2.26.0
+Requires-Dist: numpy>=2.0.0; python_version >= "3.12"
+Requires-Dist: numpy<2.0.0; python_version < "3.12"
+Requires-Dist: typer>=0.16.0
+Requires-Dist: soundfile>=0.13.0
+Requires-Dist: py-ubjson>=0.16.1
 ![](https://www.phonexia.com/wp-content/uploads/PHX_logotype_basic_2016_positive_transparent_RGB.png)

phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+phonexia_enhanced_speech_to_text_built_on_whisper_client.py,sha256=xkhDL3LCyMw0QuZL3-ZeVa9Fx2AxKjgVlOtsZvKqujE,17202
+phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/METADATA,sha256=isJHGgPYELCcAi1HYO0GGDsHE0veXaXs5KHRf43nXZU,1748
+phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/entry_points.txt,sha256=3t-DG5W0VDbM-4oqWylIKsJ5f-IDo8eJtvB8fpF9tOk,129
+phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/top_level.txt,sha256=UrRc-bXR5jArOtIsymBnXsH9Z1wEWmGu8hB0n1A32Q8,57
+phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/RECORD,,

{phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info → phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,5 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.2
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ enhanced_speech_to_text_built_on_whisper_client = phonexia_enhanced_speech_to_text_built_on_whisper_client:app

phonexia_enhanced_speech_to_text_built_on_whisper_client-2.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ phonexia_enhanced_speech_to_text_built_on_whisper_client

phonexia_enhanced_speech_to_text_built_on_whisper_client.py CHANGED Viewed

@@ -1,15 +1,17 @@
-import argparse
 import json
 import logging
-import os
+import re
+from collections.abc import Iterator
 from datetime import datetime
 from enum import Enum
-from typing import Iterator, Optional
+from typing import Annotated, BinaryIO, Optional, TextIO
-import google.protobuf.duration_pb2
 import grpc
+import numpy as np
 import phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2_grpc as stt_grpc
 import soundfile
+import typer
+from google.protobuf.duration_pb2 import Duration
 from google.protobuf.json_format import MessageToDict
 from phonexia.grpc.common.core_pb2 import Audio, RawAudioConfig, TimeRange
 from phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enhanced_speech_to_text_built_on_whisper_pb2 import (
@@ -22,25 +24,25 @@ from phonexia.grpc.technologies.enhanced_speech_to_text_built_on_whisper.v1.enha
 CHUNK_SIZE = 32000
-class Task(Enum):
-    transcribe = "transcribe"
-    translate = "translate"
+class LogLevel(str, Enum):
+    CRITICAL = "critical"
+    ERROR = "error"
+    WARNING = "warning"
+    INFO = "info"
+    DEBUG = "debug"
-    def __str__(self):
-        return self.value
-def time_to_duration(time: float) -> Optional[google.protobuf.duration_pb2.Duration]:
+def time_to_duration(time: Optional[float]) -> Optional[Duration]:
     if time is None:
         return None
-    duration = google.protobuf.duration_pb2.Duration()
+    duration = Duration()
     duration.seconds = int(time)
     duration.nanos = int((time - duration.seconds) * 1e9)
     return duration
 def transcribe_request_iterator(
-    file: str,
+    file: BinaryIO,
     specified_language: Optional[str],
     start: Optional[float],
     end: Optional[float],
@@ -63,11 +65,15 @@ def transcribe_request_iterator(
                 encoding=RawAudioConfig.AudioEncoding.PCM16,
             )
-            for data in r.blocks(blocksize=r.samplerate, dtype="int16"):
+            for data in r.blocks(blocksize=r.samplerate, dtype="float32"):
                 logging.debug("Sending chunk of size %d samples", len(data))
+                int16_info = np.iinfo(np.int16)
+                data_scaled = np.clip(
+                    data * (int16_info.max + 1), int16_info.min, int16_info.max
+                ).astype("int16")
                 yield TranscribeRequest(
                     audio=Audio(
-                        content=data.flatten().tobytes(),
+                        content=data_scaled.flatten().tobytes(),
                         time_range=time_range,
                         raw_audio_config=raw_audio_config,
                     ),
@@ -77,22 +83,22 @@ def transcribe_request_iterator(
                 raw_audio_config = None
                 config = None
     else:
-        with open(file, "rb") as f:
-            while chunk := f.read(CHUNK_SIZE):
-                yield TranscribeRequest(
-                    audio=Audio(content=chunk, time_range=time_range), config=config
-                )
-                time_range = None
-                config = None
+        while chunk := file.read(CHUNK_SIZE):
+            yield TranscribeRequest(
+                audio=Audio(content=chunk, time_range=time_range), config=config
+            )
+            time_range = None
+            config = None
 def translate_request_iterator(
-    file: str,
+    file: BinaryIO,
     specified_language: Optional[str],
     start: Optional[float],
     end: Optional[float],
     enable_language_switching: bool = False,
     enable_word_segmentation: bool = False,
+    use_raw_audio: bool = False,
 ) -> Iterator[TranslateRequest]:
     time_range = TimeRange(start=time_to_duration(start), end=time_to_duration(end))
     config = TranslateConfig(
@@ -101,197 +107,418 @@ def translate_request_iterator(
         enable_word_segmentation=enable_word_segmentation,
     )
-    with open(file, "rb") as f:
-        while chunk := f.read(CHUNK_SIZE):
+    if use_raw_audio:
+        with soundfile.SoundFile(file) as r:
+            raw_audio_config = RawAudioConfig(
+                channels=r.channels,
+                sample_rate_hertz=r.samplerate,
+                encoding=RawAudioConfig.AudioEncoding.PCM16,
+            )
+            for data in r.blocks(blocksize=r.samplerate, dtype="float32"):
+                logging.debug("Sending chunk of size %d samples", len(data))
+                int16_info = np.iinfo(np.int16)
+                data_scaled = np.clip(
+                    data * (int16_info.max + 1), int16_info.min, int16_info.max
+                ).astype("int16")
+                yield TranslateRequest(
+                    audio=Audio(
+                        content=data_scaled.flatten().tobytes(),
+                        time_range=time_range,
+                        raw_audio_config=raw_audio_config,
+                    ),
+                    config=config,
+                )
+                time_range = None
+                raw_audio_config = None
+                config = None
+    else:
+        while chunk := file.read(CHUNK_SIZE):
             yield TranslateRequest(audio=Audio(content=chunk, time_range=time_range), config=config)
             time_range = None
             config = None
-def transcribe(
+def write_result(
+    audio_path: str,
+    responses: list,
+    output: TextIO,
+    language: Optional[str],
+):
+    logging.info(f"{audio_path!s} -> {output.name}")
+    # Aggregate all responses
+    response_dict = None
+    for _response in responses:
+        if not response_dict:
+            response_dict = MessageToDict(
+                message=_response,
+                always_print_fields_with_no_presence=True,
+                preserving_proto_field_name=True,
+            )
+        else:
+            response_dict["result"]["one_best"]["segments"] += \
+                MessageToDict(
+                    message=_response,
+                    always_print_fields_with_no_presence=True,
+                    preserving_proto_field_name=True,
+                )["result"]["one_best"]["segments"]  # fmt: skip
+    json.dump(response_dict, output, indent=2, ensure_ascii=False)
+def translate_impl(
     channel: grpc.Channel,
-    file: str,
+    file: BinaryIO,
+    output: TextIO,
     language: Optional[str],
     start: Optional[float],
     end: Optional[float],
     metadata: Optional[list],
-    task: Task,
     enable_language_switching: bool = False,
     enable_word_segmentation: bool = False,
     use_raw_audio: bool = False,
 ):
+    logging.info("Processing audio file with translate")
     stub = stt_grpc.SpeechToTextStub(channel)
-    if task == Task.transcribe:
-        response = stub.Transcribe(
-            transcribe_request_iterator(
-                file=file,
-                specified_language=language,
-                start=start,
-                end=end,
-                enable_language_switching=enable_language_switching,
-                enable_word_segmentation=enable_word_segmentation,
-                use_raw_audio=use_raw_audio,
-            ),
-            metadata=metadata,
-        )
-    elif task == Task.translate:
-        response = stub.Translate(
-            translate_request_iterator(
-                file=file,
-                specified_language=language,
-                start=start,
-                end=end,
-                enable_language_switching=enable_language_switching,
-                enable_word_segmentation=enable_word_segmentation,
-            ),
-            metadata=metadata,
-        )
-    else:
-        raise RuntimeError("Unknown task")
+    response = stub.Translate(
+        translate_request_iterator(
+            file=file,
+            specified_language=language,
+            start=start,
+            end=end,
+            enable_language_switching=enable_language_switching,
+            enable_word_segmentation=enable_word_segmentation,
+            use_raw_audio=use_raw_audio,
+        ),
+        metadata=metadata,
+    )
+    # Collect all responses
+    responses = list(response)
+    write_result(file.name, responses, output, language)
-    info_message = []
-    response_dict = None
-    for _response in response:
-        if not response_dict:
-            response_dict = MessageToDict(_response)
-        else:
-            response_dict["result"]["oneBest"]["segments"] += \
-                MessageToDict(_response)["result"]["oneBest"]["segments"]  # fmt: skip
-        for segment in _response.result.one_best.segments:
-            if segment.source_language != segment.detected_source_language:
-                info_message.append(
-                    f"Language '{segment.detected_source_language}' was detected in the audio, but instead "
-                    f"the segment was {'transcribed' if task == Task.transcribe else 'translated'} with the "
-                    + (
-                        f"closest available source language '{segment.source_language}'"
-                        if language is None
-                        else f"language '{language}' that was enforced by the '--language' argument"
-                    )
-                )
-    print(json.dumps(response_dict, indent=2, ensure_ascii=False))
-    info_message = set(info_message)
-    if len(info_message) > 0:
-        for msg in info_message:
-            logging.info(msg)
+def transcribe_impl(
+    channel: grpc.Channel,
+    file: BinaryIO,
+    output: TextIO,
+    language: Optional[str],
+    start: Optional[float],
+    end: Optional[float],
+    metadata: Optional[list],
+    enable_language_switching: bool = False,
+    enable_word_segmentation: bool = False,
+    use_raw_audio: bool = False,
+):
+    logging.info("Processing audio file with transcribe")
+    stub = stt_grpc.SpeechToTextStub(channel)
+    response = stub.Transcribe(
+        transcribe_request_iterator(
+            file=file,
+            specified_language=language,
+            start=start,
+            end=end,
+            enable_language_switching=enable_language_switching,
+            enable_word_segmentation=enable_word_segmentation,
+            use_raw_audio=use_raw_audio,
+        ),
+        metadata=metadata,
+    )
+    # Collect all responses
+    responses = list(response)
+    write_result(file.name, responses, output, language)
-def main():
-    parser = argparse.ArgumentParser(
-        description=(
-            "Enhanced Speech to Text Built on Whisper gRPC client. Transcribes input audio into segments"
-            " with timestamps."
+# Helper functions
+def _parse_time_range(time_range: str) -> tuple[Optional[float], Optional[float]]:
+    if time_range is None:
+        return None, None
+    if len(time_range) == 0:
+        raise typer.BadParameter("Parameter 'time_range' must be of the form '[START]:[END]'.")
+    # Regex pattern to match [START]:[END] format where START and END are positive floats
+    pattern = r"^(\d+(?:\.\d+)?)?:(\d+(?:\.\d+)?)?$"
+    match = re.match(pattern, time_range.strip())
+    if not match:
+        raise typer.BadParameter(
+            "Parameter 'time_range' must be of the form '[START]:[END]' where START and END are positive float numbers."
         )
-    )
-    parser.add_argument(
-        "-H",
-        "--host",
-        type=str,
-        default="localhost:8080",
-        help="Server address, default: localhost:8080",
-    )
-    parser.add_argument(
-        "-l",
-        "--log_level",
-        type=str,
-        default="error",
-        choices=["critical", "error", "warning", "info", "debug"],
-    )
-    parser.add_argument(
-        "--metadata",
-        metavar="key=value",
-        nargs="+",
-        type=lambda x: tuple(x.split("=")),
-        help="Custom client metadata",
-    )
-    parser.add_argument("--use_ssl", action="store_true", help="Use SSL connection")
-    parser.add_argument("--start", type=float, help="Audio start time")
-    parser.add_argument("--end", type=float, help="Audio end time")
-    parser.add_argument(
-        "--language",
-        type=str,
-        default=None,
-        help=(
-            "Force transcription to specified language, if not set, language is detected"
-            " automatically"
-        ),
-    )
-    parser.add_argument(
-        "--task",
-        type=Task,
-        default=Task.transcribe,
-        choices=list(Task),
-        help="Select whether to transcribe or translate the recording",
-    )
-    parser.add_argument(
-        "--enable-language-switching",
-        action="store_true",
-        help="Enable dynamic language switching during transcription, with the language being detected approximately every 30 seconds",
-    )
-    parser.add_argument(
-        "--enable-word-segmentation",
-        action="store_true",
-        help="Enable word-level transcription. Note: Enabling this option may increase processing time",
-    )
-    parser.add_argument("file", type=str, help="Path to input file")
-    parser.add_argument("--use_raw_audio", action="store_true", help="Send a raw audio in")
+    # Parse START and END from regex groups
+    start_str = match.group(1)
+    end_str = match.group(2)
-    args = parser.parse_args()
+    start = float(start_str) if start_str is not None else None
+    end = float(end_str) if end_str is not None else None
-    if args.start is not None and args.start < 0:
-        raise ValueError("Parameter 'start' must be a non-negative float.")
+    if start is not None and end is not None and start >= end:
+        raise typer.BadParameter("Parameter 'end' must be larger than 'start'.")
-    if args.end is not None and args.end <= 0:
-        raise ValueError("Parameter 'end' must be a positive float.")
+    return (None if start == 0.0 else start, end)
-    if args.start is not None and args.end is not None and args.start >= args.end:
-        raise ValueError("Parameter 'end' must be larger than 'start'.")
-    logging.basicConfig(
-        level=args.log_level.upper(),
-        format="[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S",
-    )
+def _parse_metadata_callback(
+    ctx: typer.Context, metadata_list: Optional[list[str]]
+) -> list[tuple[str, str]]:
+    if ctx.resilient_parsing or metadata_list is None:
+        return []
+    params = []
+    for item in metadata_list:
+        t = tuple(item.split("=", 1))
+        if len(t) != 2:
+            raise typer.BadParameter(f"Metadata must be in format 'KEY=VALUE': {item}")
+        params.append(t)
+    return params
-    if not os.path.isfile(args.file):
-        logging.error(f"no such file {args.file}")
-        exit(1)
+app = typer.Typer(context_settings={"help_option_names": ["-h", "--help"]}, no_args_is_help=True)
+@app.command()
+def translate(
+    ctx: typer.Context,
+    input_file: Annotated[
+        typer.FileBinaryRead,
+        typer.Argument(
+            help="Input audio file path.",
+        ),
+    ] = "-",
+    time_range: Annotated[
+        Optional[str],
+        typer.Option(
+            "-t",
+            "--time-range",
+            callback=_parse_time_range,
+            metavar="[START]:[END]",
+            help=(
+                "Time range in seconds using format [START]:[END] where START and END are positive float numbers. "
+                "START can be omitted to process from beginning, END can be omitted to process to the end of the recording. "
+                "Examples: --time-range :10 (0 to 10), --time-range 10.1: (10.1 to end), --time-range 5:10 (5 to 10)."
+            ),
+        ),
+    ] = None,
+    language: Annotated[
+        Optional[str],
+        typer.Option(
+            "--language",
+            help=(
+                "Force transcription to specified language, if not set, language is detected "
+                "automatically."
+            ),
+        ),
+    ] = None,
+    enable_language_switching: Annotated[
+        bool,
+        typer.Option(
+            "--enable-language-switching",
+            help="Enable dynamic language switching during transcription, with the language being detected approximately every 30 seconds.",
+        ),
+    ] = False,
+    enable_word_segmentation: Annotated[
+        bool,
+        typer.Option(
+            "--enable-word-segmentation",
+            help="Enable word-level transcription. Note: Enabling this option may increase processing time.",
+        ),
+    ] = False,
+    use_raw_audio: Annotated[
+        bool,
+        typer.Option(
+            "--use-raw-audio",
+            help="Send raw audio in chunks. Enables continuous audio processing with less server memory usage.",
+        ),
+    ] = False,
+    output: Annotated[
+        typer.FileTextWrite,
+        typer.Option(
+            "--output", "-o", help="Output file path. If omitted, prints to stdout.", lazy=False
+        ),
+    ] = "-",
+) -> None:
+    """Translates input audio into segments with timestamps."""
     try:
-        logging.info(f"Connecting to {args.host}")
-        channel = (
-            grpc.secure_channel(target=args.host, credentials=grpc.ssl_channel_credentials())
-            if args.use_ssl
-            else grpc.insecure_channel(target=args.host)
-        )
+        logging.info(f"Connecting to {ctx.obj['host']}")
+        with (
+            grpc.insecure_channel(target=ctx.obj["host"])
+            if ctx.obj["plaintext"]
+            else grpc.secure_channel(
+                target=ctx.obj["host"], credentials=grpc.ssl_channel_credentials()
+            )
+        ) as channel:
+            start_time = datetime.now()
+            translate_impl(
+                channel=channel,
+                file=input_file,
+                output=output,
+                language=language,
+                start=time_range[0],
+                end=time_range[1],
+                metadata=ctx.obj["metadata"],
+                enable_language_switching=enable_language_switching,
+                enable_word_segmentation=enable_word_segmentation,
+                use_raw_audio=use_raw_audio,
+            )
-        start_time = datetime.now()
-        transcribe(
-            channel=channel,
-            file=args.file,
-            language=args.language,
-            start=args.start,
-            end=args.end,
-            metadata=args.metadata,
-            task=args.task,
-            enable_language_switching=args.enable_language_switching,
-            enable_word_segmentation=args.enable_word_segmentation,
-            use_raw_audio=args.use_raw_audio,
-        )
+            logging.debug(f"Elapsed time {(datetime.now() - start_time)}")
+    except grpc.RpcError:
+        logging.exception("RPC failed")
+        raise typer.Exit(code=1) from None
+    except (typer.Exit, typer.BadParameter):
+        raise
+    except Exception:
+        logging.exception("Unknown error")
+        raise typer.Exit(code=2) from None
+@app.command()
+def transcribe(
+    ctx: typer.Context,
+    input_file: Annotated[
+        typer.FileBinaryRead,
+        typer.Argument(
+            help="Input audio file path.",
+        ),
+    ] = "-",
+    time_range: Annotated[
+        Optional[str],
+        typer.Option(
+            "-t",
+            "--time-range",
+            callback=_parse_time_range,
+            metavar="[START]:[END]",
+            help=(
+                "Time range in seconds using format [START]:[END] where START and END are positive float numbers. "
+                "START can be omitted to process from beginning, END can be omitted to process to the end of the recording. "
+                "Examples: --time-range :10 (0 to 10), --time-range 10.1: (10.1 to end), --time-range 5:10 (5 to 10)."
+            ),
+        ),
+    ] = None,
+    language: Annotated[
+        Optional[str],
+        typer.Option(
+            "--language",
+            help=(
+                "Force transcription to specified language, if not set, language is detected "
+                "automatically."
+            ),
+        ),
+    ] = None,
+    enable_language_switching: Annotated[
+        bool,
+        typer.Option(
+            "--enable-language-switching",
+            help="Enable dynamic language switching during transcription, with the language being detected approximately every 30 seconds.",
+        ),
+    ] = False,
+    enable_word_segmentation: Annotated[
+        bool,
+        typer.Option(
+            "--enable-word-segmentation",
+            help="Enable word-level transcription. Note: Enabling this option may increase processing time.",
+        ),
+    ] = False,
+    use_raw_audio: Annotated[
+        bool,
+        typer.Option(
+            "--use-raw-audio",
+            help="Send raw audio in chunks. Enables continuous audio processing with less server memory usage.",
+        ),
+    ] = False,
+    output: Annotated[
+        typer.FileTextWrite,
+        typer.Option(
+            "--output", "-o", help="Output file path. If omitted, prints to stdout.", lazy=False
+        ),
+    ] = "-",
+) -> None:
+    """Transcribes input audio into segments with timestamps."""
+    try:
+        logging.info(f"Connecting to {ctx.obj['host']}")
+        with (
+            grpc.insecure_channel(target=ctx.obj["host"])
+            if ctx.obj["plaintext"]
+            else grpc.secure_channel(
+                target=ctx.obj["host"], credentials=grpc.ssl_channel_credentials()
+            )
+        ) as channel:
+            start_time = datetime.now()
+            transcribe_impl(
+                channel=channel,
+                file=input_file,
+                output=output,
+                language=language,
+                start=time_range[0],
+                end=time_range[1],
+                metadata=ctx.obj["metadata"],
+                enable_language_switching=enable_language_switching,
+                enable_word_segmentation=enable_word_segmentation,
+                use_raw_audio=use_raw_audio,
+            )
-        logging.debug(f"Elapsed time {(datetime.now() - start_time)}")
+            logging.debug(f"Elapsed time {(datetime.now() - start_time)}")
     except grpc.RpcError:
         logging.exception("RPC failed")
-        exit(1)
+        raise typer.Exit(code=1) from None
+    except (typer.Exit, typer.BadParameter):
+        raise
     except Exception:
         logging.exception("Unknown error")
-        exit(1)
+        raise typer.Exit(code=2) from None
+@app.callback()
+def cli(
+    ctx: typer.Context,
+    host: Annotated[
+        str,
+        typer.Option("--host", "-H", help="Server address (host:port)."),
+    ] = "localhost:8080",
+    log_level: Annotated[
+        LogLevel, typer.Option("--log-level", "-l", help="Logging level.")
+    ] = LogLevel.ERROR,
+    metadata: Annotated[
+        list[str],
+        typer.Option(
+            "--metadata",
+            metavar="key=value",
+            help="Custom client metadata.",
+            show_default=False,
+            callback=_parse_metadata_callback,
+        ),
+    ] = [],
+    plaintext: Annotated[
+        bool,
+        typer.Option(
+            "--plaintext", help="Use plain-text HTTP/2 when connecting to server (no TLS)."
+        ),
+    ] = False,
+) -> None:
+    """Enhanced Speech to Text Built on Whisper gRPC client."""
+    ctx.obj = {
+        "host": host,
+        "metadata": metadata,
+        "log_level": log_level,
+        "plaintext": plaintext,
+    }
+    logging.basicConfig(
+        level=log_level.value.upper(),
+        format="[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
 if __name__ == "__main__":
-    main()
+    app()

phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info/RECORD DELETED Viewed

@@ -1,5 +0,0 @@
-phonexia_enhanced_speech_to_text_built_on_whisper_client.py,sha256=acZwljE4vRaKtf71f6Cm75c81iYhrmq59FoQIz5k0kI,9928
-phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info/METADATA,sha256=c9vqgI_gYaz7EFROo2Sq_36MUEp0rxDKRSRnRiKhsVs,2343
-phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info/entry_points.txt,sha256=RZ7mWDaVGagDYxjXloW7ndadXlJVwg9Xov0gqvPTqHs,129
-phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info/RECORD,,

phonexia_enhanced_speech_to_text_built_on_whisper_client-1.10.0.dist-info/entry_points.txt DELETED Viewed

@@ -1,3 +0,0 @@
-[console_scripts]
-enhanced_speech_to_text_built_on_whisper_client=phonexia_enhanced_speech_to_text_built_on_whisper_client:main

phonexia-enhanced-speech-to-text-built-on-whisper-client 1.10.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

phonexia-enhanced-speech-to-text-built-on-whisper-client 1.10.0py3-none-any.whl → 2.0.0py3-none-any.whl