PyPI - gl-speech-sdk - Versions diffs - 0.0.1b1__py3-none-any.whl - Mend

gl-speech-sdk 0.0.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

gl_speech_sdk/__init__.py +69 -0
gl_speech_sdk/client.py +86 -0
gl_speech_sdk/models.py +409 -0
gl_speech_sdk/py.typed +0 -0
gl_speech_sdk/stt.py +456 -0
gl_speech_sdk/tts.py +449 -0
gl_speech_sdk/webhooks.py +551 -0
gl_speech_sdk-0.0.1b1.dist-info/METADATA +417 -0
gl_speech_sdk-0.0.1b1.dist-info/RECORD +11 -0
gl_speech_sdk-0.0.1b1.dist-info/WHEEL +4 -0
gl_speech_sdk-0.0.1b1.dist-info/licenses/LICENSE +21 -0

gl_speech_sdk/__init__.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""GL Speech Python client library for interacting with the Prosa Speech API."""
+from gl_speech_sdk.client import SpeechClient
+from gl_speech_sdk.models import (
+    AudioFormat,
+    DeliveryTicket,
+    JobStatus,
+    STTConfig,
+    STTJobRequest,
+    STTJobsListResponse,
+    STTRequest,
+    STTResponse,
+    STTStatusResponse,
+    TTSConfig,
+    TTSJobRequest,
+    TTSJobsListResponse,
+    TTSRequest,
+    TTSResponse,
+    TTSStatusResponse,
+    VoiceGender,
+    WebhookDelivery,
+    WebhookEndpoint,
+    WebhookEndpointCreate,
+    WebhookEndpointListing,
+    WebhookEndpointUpdate,
+    WebhookEvent,
+    WebhookEventListing,
+    WebhookEventType,
+)
+from gl_speech_sdk.stt import SpeechToText
+from gl_speech_sdk.tts import TextToSpeech
+from gl_speech_sdk.webhooks import Webhooks
+__all__ = [
+    # Client
+    "SpeechClient",
+    # API Classes
+    "SpeechToText",
+    "TextToSpeech",
+    "Webhooks",
+    # Enums
+    "JobStatus",
+    "AudioFormat",
+    "VoiceGender",
+    "WebhookEventType",
+    # STT Models
+    "STTConfig",
+    "STTRequest",
+    "STTJobRequest",
+    "STTResponse",
+    "STTStatusResponse",
+    "STTJobsListResponse",
+    # TTS Models
+    "TTSConfig",
+    "TTSRequest",
+    "TTSJobRequest",
+    "TTSResponse",
+    "TTSStatusResponse",
+    "TTSJobsListResponse",
+    # Webhook Models
+    "WebhookEndpoint",
+    "WebhookEndpointCreate",
+    "WebhookEndpointUpdate",
+    "WebhookEndpointListing",
+    "WebhookEvent",
+    "WebhookEventListing",
+    "WebhookDelivery",
+    "DeliveryTicket",
+]

gl_speech_sdk/client.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""GL Speech Python client library for interacting with the Prosa Speech API.
+This library provides a simple interface to interact with the Prosa Speech API,
+supporting Speech-to-Text (STT), Text-to-Speech (TTS), and Webhook operations.
+Example:
+    >>> client = SpeechClient(api_key="your-api-key")
+    >>> # Transcribe audio
+    >>> result = client.stt.transcribe(
+    ...     data="<base64-encoded-audio>",
+    ...     model="stt-general",
+    ...     wait=True
+    ... )
+    >>> print(result.result)
+    >>>
+    >>> # Synthesize speech
+    >>> result = client.tts.synthesize(
+    ...     text="Hello, world!",
+    ...     model="tts-dimas-formal",
+    ...     wait=True
+    ... )
+    >>> print(result.result)
+Authors:
+    GDP Labs
+References:
+    https://docs2.prosa.ai/
+"""
+import os
+from gl_speech_sdk.stt import SpeechToText
+from gl_speech_sdk.tts import TextToSpeech
+from gl_speech_sdk.webhooks import Webhooks
+# Ensure the URL ends with a slash; without the trailing slash, the base path will be incorrect.
+DEFAULT_BASE_URL = "https://api.prosa.ai/v2/speech/"
+class SpeechClient:
+    """Prosa Speech API Client.
+    Attributes:
+        api_key (str): API key for authentication
+        base_url (str): Base URL for the Prosa Speech API
+        timeout (float): Request timeout in seconds
+        default_headers (dict[str, str]): Default headers to include in all requests
+        stt (SpeechToText): SpeechToText instance for speech-to-text operations
+        tts (TextToSpeech): TextToSpeech instance for text-to-speech operations
+        webhooks (Webhooks): Webhooks instance for webhook management operations
+    """
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        timeout: float = 60.0,
+        default_headers: dict[str, str] | None = None,
+    ):
+        """
+        Initialize SpeechClient.
+        Args:
+            api_key (str | None): API key for authentication. If not provided,
+                will try to get from GLSPEECH_API_KEY environment variable
+            base_url (str | None): Base URL for the Prosa Speech API. If not provided,
+                will try to get from GLSPEECH_BASE_URL environment variable,
+                otherwise uses default
+            timeout (float): Request timeout in seconds
+            default_headers (dict[str, str] | None): Default headers to include in all requests.
+                These will be merged with any extra_headers provided to individual methods.
+        """
+        self.api_key = api_key or os.getenv("GLSPEECH_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "API key is required. Provide it via 'api_key' parameter or "
+                "'GLSPEECH_API_KEY' environment variable."
+            )
+        self.base_url = base_url or os.getenv("GLSPEECH_BASE_URL") or DEFAULT_BASE_URL
+        self.timeout = timeout
+        self.default_headers = default_headers or {}
+        self.stt = SpeechToText(self)
+        self.tts = TextToSpeech(self)
+        self.webhooks = Webhooks(self)

gl_speech_sdk/models.py ADDED Viewed

@@ -0,0 +1,409 @@
+"""Data models for the GL Speech Python client.
+This module contains Pydantic models for request and response data structures
+used in the GL Speech Python client library.
+Example:
+    >>> request = STTJobRequest(
+    ...     model="stt-general",
+    ...     wait=True,
+    ...     data="<base64-encoded-audio>"
+    ... )
+    >>> data = request.model_dump(exclude_none=True)
+Authors:
+    GDP Labs
+References:
+    https://docs2.prosa.ai/
+"""
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+# =============================================================================
+# Common Enums
+# =============================================================================
+class JobStatus(str, Enum):
+    """Status of a job's overall progress."""
+    COMPLETE = "complete"
+    CREATED = "created"
+    QUEUED = "queued"
+    IN_PROGRESS = "in_progress"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+class AudioFormat(str, Enum):
+    """Supported audio formats for TTS."""
+    OPUS = "opus"
+    MP3 = "mp3"
+    WAV = "wav"
+class VoiceGender(str, Enum):
+    """Voice gender for TTS models."""
+    MALE = "male"
+    FEMALE = "female"
+class WebhookEventType(str, Enum):
+    """Supported webhook event types.
+    These are the known event types emitted by the Prosa Speech API.
+    Use these constants when filtering webhook events.
+    Example:
+        >>> endpoint = client.webhooks.create_endpoint(
+        ...     url="https://example.com/webhook",
+        ...     event_filters=[WebhookEventType.TTS_JOB_COMPLETED.value]
+        ... )
+    """
+    STT_JOB_COMPLETED = "stt.job.completed"
+    STT_JOB_FAILED = "stt.job.failed"
+    TTS_JOB_COMPLETED = "tts.job.completed"
+    TTS_JOB_FAILED = "tts.job.failed"
+# =============================================================================
+# STT (Speech-to-Text) Models
+# =============================================================================
+class STTConfig(BaseModel):
+    """Configuration for STT job execution."""
+    model_config = ConfigDict(protected_namespaces=(), populate_by_name=True)
+    # API accepts both "model" and "engine", but some endpoints require "engine"
+    model: str = Field(serialization_alias="engine")
+    wait: bool | None = False
+    speaker_count: int | None = 1
+    include_filler: bool | None = False
+    include_partial_results: bool | None = False
+    auto_punctuation: bool | None = False
+    enable_spoken_numerals: bool | None = False
+    enable_speech_insights: bool | None = False
+    enable_voice_insights: bool | None = False
+    enable_conversation_analytics: bool | None = False
+    conversation_analytics_prompt: str | None = "basic"
+class STTRequest(BaseModel):
+    """Request payload for an STT job."""
+    label: str | None = None
+    data: str | None = None
+    uri: str | None = None
+    duration: float | None = None
+    mime_type: str | None = None
+    sample_rate: int | None = None
+    channels: int | None = None
+    @model_validator(mode="after")
+    def check_data_or_uri(self) -> "STTRequest":
+        if not self.data and not self.uri:
+            raise ValueError("One of 'data' or 'uri' is required")
+        return self
+class STTJobRequest(BaseModel):
+    """Complete STT job request including config and request payload."""
+    model_config = ConfigDict(protected_namespaces=())
+    config: STTConfig
+    request: STTRequest
+class TranscriptionResult(BaseModel):
+    """Speech segment transcribed from the audio."""
+    transcript: str
+    final: bool
+    time_start: float
+    time_end: float
+    channel: int | None = 0
+    speaker_tag: int | None = 1
+class STTResult(BaseModel):
+    """Result of an STT job."""
+    data: list[TranscriptionResult] | None = None
+    path: str | None = None
+    latency: float | None = None
+    speaker_count: int | None = None
+    speech_insights: dict[str, Any] | None = None
+    voice_insights: dict[str, Any] | None = None
+    conversation_analytics: dict[str, Any] | None = None
+    error: str | None = None
+    message: str | None = None
+class STTProgress(BaseModel):
+    """Progress of the STT job."""
+    total: float
+    details: dict[str, float] | None = None
+class ASRModel(BaseModel):
+    """ASR model information."""
+    name: str
+    label: str | None = None
+    language: str
+    domain: str
+    acoustic: str
+    samplerate: int
+class STTResponse(BaseModel):
+    """Response from STT API."""
+    job_id: str
+    status: str
+    created_at: str
+    modified_at: str
+    request: STTRequest | None = None
+    result: STTResult | None = None
+    job_config: STTConfig | None = None
+    progress: STTProgress | None = None
+    model: ASRModel | None = None
+class STTStatusResponse(BaseModel):
+    """Status response from STT API."""
+    job_id: str
+    status: str
+    created_at: str
+    modified_at: str
+class STTJobsListResponse(BaseModel):
+    """List response from STT jobs API."""
+    pagination: dict[str, Any] | None = None
+    length: int
+    data: list[STTResponse]
+# =============================================================================
+# TTS (Text-to-Speech) Models
+# =============================================================================
+class TextWithTimestamp(BaseModel):
+    """Text segment with start and end timestamps."""
+    text: str
+    start: float
+    end: float
+class TTSConfig(BaseModel):
+    """Configuration for TTS job execution."""
+    model_config = ConfigDict(protected_namespaces=())
+    model: str
+    wait: bool | None = False
+    pitch: float | None = 0.0
+    tempo: float | None = 1.0
+    audio_format: str | None = "opus"
+    sample_rate: int | None = None
+class TTSRequest(BaseModel):
+    """Request payload for a TTS job."""
+    label: str | None = None
+    text: str | None = None
+    ssml: str | None = None
+    @model_validator(mode="after")
+    def check_text_or_ssml(self) -> "TTSRequest":
+        if not self.text and not self.ssml:
+            raise ValueError("One of 'text' or 'ssml' is required")
+        if self.text and self.ssml:
+            raise ValueError("Only one of 'text' or 'ssml' can be provided")
+        return self
+class TTSJobRequest(BaseModel):
+    """Complete TTS job request including config and request payload."""
+    model_config = ConfigDict(protected_namespaces=())
+    config: TTSConfig
+    request: TTSRequest
+class TTSResult(BaseModel):
+    """Result of a TTS job."""
+    data: str | None = None
+    path: str | None = None
+    format: str | None = None
+    duration: float | None = None
+    samplerate: int | None = None
+    channels: int | None = None
+    texts: list[TextWithTimestamp] | None = None
+    latency: float | None = None
+    error: str | None = None
+class TTSModel(BaseModel):
+    """TTS model information."""
+    name: str
+    label: str | None = None
+    language: str
+    domain: str
+    voice: str
+    gender: str
+    channels: int
+    samplerate: int
+class TTSResponse(BaseModel):
+    """Response from TTS API."""
+    job_id: str
+    status: str
+    created_at: str
+    modified_at: str
+    request: TTSRequest | None = None
+    result: TTSResult | None = None
+    job_config: TTSConfig | None = None
+    model: TTSModel | None = None
+class TTSStatusResponse(BaseModel):
+    """Status response from TTS API."""
+    job_id: str
+    status: str
+    created_at: str
+    modified_at: str
+class TTSJobsListResponse(BaseModel):
+    """List response from TTS jobs API."""
+    pagination: dict[str, Any] | None = None
+    length: int
+    data: list[TTSResponse]
+# =============================================================================
+# Webhook Models
+# =============================================================================
+class WebhookEndpointCreate(BaseModel):
+    """Request payload for creating a webhook endpoint."""
+    url: str
+    event_filters: list[str] | None = None
+    ssl_verification: bool | None = True
+    secret_key: str | None = None
+class WebhookEndpointUpdate(BaseModel):
+    """Request payload for updating a webhook endpoint."""
+    url: str | None = None
+    event_filters: list[str] | None = None
+    ssl_verification: bool | None = True
+class WebhookSecret(BaseModel):
+    """Webhook secret information."""
+    id: str
+    key: str
+    expired_at: str | None = None
+class WebhookEndpoint(BaseModel):
+    """Webhook endpoint information."""
+    id: str
+    url: str
+    secrets: list[WebhookSecret] | None = None
+    event_filters: list[str] | None = None
+    ssl_verification: bool
+class WebhookEndpointListing(BaseModel):
+    """Webhook endpoint listing information (without secrets)."""
+    id: str
+    url: str
+    event_filters: list[str] | None = None
+    ssl_verification: bool
+class WebhookRotationPeriod(BaseModel):
+    """Webhook secret rotation period."""
+    days: int | None = 3
+    hours: int | None = 0
+class WebhookRotation(BaseModel):
+    """Request payload for rotating webhook secrets."""
+    rotation_period: WebhookRotationPeriod
+class WebhookEvent(BaseModel):
+    """Webhook event information."""
+    id: str
+    event_type: str
+    data: dict[str, Any]
+    created_at: str
+class WebhookEventListing(BaseModel):
+    """Webhook event listing information (without data)."""
+    id: str
+    event_type: str
+    created_at: str
+class WebhookDelivery(BaseModel):
+    """Webhook delivery information."""
+    delivery_id: str
+    event_id: str
+    endpoint_id: str
+    delivery: str
+    request_method: str
+    request_headers: dict[str, Any]
+    request_body: dict[str, Any]
+    response_status: int
+    response_headers: dict[str, Any]
+    response_body: str
+    elapsed_time: float
+class DeliveryTicket(BaseModel):
+    """Delivery ticket response."""
+    delivery_tag: str
+    endpoint_id: str
+    url: str

gl_speech_sdk/py.typed ADDED Viewed

File without changes