PyPI - gl-speech-sdk - Versions diffs - 0.0.1b1__py3-none-any.whl - Mend

gl-speech-sdk 0.0.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

gl_speech_sdk/__init__.py +69 -0
gl_speech_sdk/client.py +86 -0
gl_speech_sdk/models.py +409 -0
gl_speech_sdk/py.typed +0 -0
gl_speech_sdk/stt.py +456 -0
gl_speech_sdk/tts.py +449 -0
gl_speech_sdk/webhooks.py +551 -0
gl_speech_sdk-0.0.1b1.dist-info/METADATA +417 -0
gl_speech_sdk-0.0.1b1.dist-info/RECORD +11 -0
gl_speech_sdk-0.0.1b1.dist-info/WHEEL +4 -0
gl_speech_sdk-0.0.1b1.dist-info/licenses/LICENSE +21 -0

gl_speech_sdk-0.0.1b1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,417 @@
+Metadata-Version: 2.4
+Name: gl-speech-sdk
+Version: 0.0.1b1
+Summary: GL Speech Python Client - Language binding SDK for Prosa Speech API
+Author-email: GDP Labs <jobs@gdplabs.id>
+License-File: LICENSE
+Requires-Python: >=3.13
+Requires-Dist: httpx>=0.28.1
+Requires-Dist: pydantic>=2.0.0
+Description-Content-Type: text/markdown
+# GL Speech SDK
+A Python SDK for interacting with the Prosa Speech API, providing speech-to-text (STT), text-to-speech (TTS), and webhook management capabilities.
+## Installation
+```bash
+pip install glspeech-sdk
+```
+Or with uv:
+```bash
+uv add glspeech-sdk
+```
+## Quick Start
+```python
+from gl_speech_sdk import SpeechClient
+# Initialize the client
+client = SpeechClient(api_key="your-api-key")
+# Speech-to-Text
+result = client.stt.transcribe(
+    data="<base64-encoded-audio>",
+    model="stt-general",
+    wait=True
+)
+print(result.result)
+# Text-to-Speech
+result = client.tts.synthesize(
+    text="Hello, world!",
+    model="tts-dimas-formal",
+    wait=True
+)
+print(result.result)
+```
+## Configuration
+### Environment Variables
+The SDK supports the following environment variables:
+- `GLSPEECH_API_KEY`: API key for authentication
+- `GLSPEECH_BASE_URL`: Base URL for the API (default: `https://api.prosa.ai/v2/speech/`)
+### Client Initialization
+```python
+from gl_speech_sdk import SpeechClient
+# Using explicit parameters
+client = SpeechClient(
+    api_key="your-api-key",
+    base_url="https://api.prosa.ai/v2/speech/",
+    timeout=60.0,
+    default_headers={"X-Custom-Header": "value"}
+)
+# Using environment variables
+import os
+os.environ["GLSPEECH_API_KEY"] = "your-api-key"
+client = SpeechClient()
+```
+## Speech-to-Text (STT)
+### List Available Models
+```python
+models = client.stt.list_models()
+for model in models:
+    print(f"{model['name']}: {model['label']}")
+```
+### Transcribe Audio
+```python
+# Synchronous (wait for result)
+result = client.stt.transcribe(
+    model="stt-general",
+    wait=True,
+    data="<base64-encoded-audio>",
+    label="My audio file"
+)
+print(result.result)
+# Asynchronous (get job_id, poll later)
+result = client.stt.transcribe(
+    model="stt-general",
+    wait=False,
+    uri="https://example.com/audio.wav"
+)
+job_id = result.job_id
+# Check status
+status = client.stt.get_status(job_id)
+print(f"Status: {status.status}, Progress: {status.progress}")
+# Get result when complete
+result = client.stt.get_job(job_id)
+print(result.result)
+```
+### Advanced Configuration
+```python
+result = client.stt.transcribe(
+    model="stt-general",
+    wait=True,
+    data="<base64-encoded-audio>",
+    speaker_count=2,           # Expected number of speakers
+    include_filler=True,       # Include filler words
+    auto_punctuation=True,     # Auto-add punctuation
+    enable_spoken_numerals=True,  # Convert "one" to "1"
+    enable_speech_insights=True,  # Speech analytics
+    enable_voice_insights=True,   # Voice analytics
+)
+```
+### List and Manage Jobs
+```python
+# List jobs with filters
+jobs = client.stt.list_jobs(
+    page=1,
+    per_page=10,
+    from_date="2024-01-01",
+    until_date="2024-01-31",
+    query_text="hello"
+)
+# Archive a job
+client.stt.archive(job_id)
+```
+## Text-to-Speech (TTS)
+### List Available Models
+```python
+models = client.tts.list_models()
+for model in models:
+    print(f"{model['name']}: {model['voice']} ({model['gender']})")
+```
+### Synthesize Speech
+```python
+# Synchronous (wait for result)
+result = client.tts.synthesize(
+    text="Hello, world!",
+    model="tts-dimas-formal",
+    wait=True
+)
+audio_data = result.result["data"]  # Base64-encoded audio
+# Get as signed URL instead
+result = client.tts.synthesize(
+    text="Hello, world!",
+    model="tts-dimas-formal",
+    wait=True,
+    as_signed_url=True
+)
+audio_url = result.result["path"]
+# Asynchronous
+result = client.tts.synthesize(
+    text="Long text content...",
+    model="tts-dimas-formal",
+    wait=False
+)
+job_id = result.job_id
+# Poll for completion
+result = client.tts.get_job(job_id, as_signed_url=True)
+```
+### Advanced Configuration
+```python
+result = client.tts.synthesize(
+    text="Hello, world!",
+    model="tts-dimas-formal",
+    wait=True,
+    pitch=0.5,           # Pitch adjustment (-1.0 to 1.0)
+    tempo=1.2,           # Speed adjustment (0.5 to 2.0)
+    audio_format="mp3",  # "opus", "mp3", or "wav"
+    label="My synthesis"
+)
+```
+### List and Manage Jobs
+```python
+# List jobs
+jobs = client.tts.list_jobs(page=1, per_page=10)
+# Count jobs
+count = client.tts.count_jobs(from_date="2024-01-01")
+# Get job status
+status = client.tts.get_status(job_id)
+# Archive a job
+client.tts.archive(job_id)
+```
+## Webhook Management
+### Create a Webhook Endpoint
+```python
+endpoint = client.webhooks.create_endpoint(
+    url="https://your-server.com/webhook",
+    event_filters=["stt.jobs.completed", "tts.jobs.completed"],
+    ssl_verification=True
+)
+print(f"Endpoint ID: {endpoint.id}")
+print(f"Secret Key: {endpoint.secrets[0].key}")
+```
+### List Endpoints
+```python
+endpoints = client.webhooks.list_endpoints()
+for ep in endpoints:
+    print(f"{ep.id}: {ep.url}")
+```
+### Update and Delete Endpoints
+```python
+# Update
+endpoint = client.webhooks.update_endpoint(
+    endpoint_id="endpoint-123",
+    url="https://your-server.com/new-webhook",
+    event_filters=[]
+)
+# Delete
+client.webhooks.delete_endpoint("endpoint-123")
+```
+### Rotate Secrets
+```python
+endpoint = client.webhooks.rotate_secret(
+    endpoint_id="endpoint-123",
+    days=3,  # Old secret valid for 3 days
+    hours=0
+)
+```
+### Event Management
+```python
+# List events
+events = client.webhooks.list_events(
+    from_date="2024-01-01",
+    to_date="2024-01-31"
+)
+# Get event details
+event = client.webhooks.get_event("event-123")
+print(event.data)
+```
+### Delivery Management
+```python
+# List deliveries
+deliveries = client.webhooks.list_deliveries("endpoint-123")
+# Replay a delivery
+ticket = client.webhooks.replay_delivery("delivery-123")
+# Replay all failed deliveries
+tickets = client.webhooks.replay_failed_deliveries("endpoint-123")
+# Test endpoint
+ticket = client.webhooks.test_endpoint("endpoint-123")
+```
+## Error Handling
+```python
+import httpx
+from gl_speech_sdk import SpeechClient
+client = SpeechClient(api_key="your-api-key")
+try:
+    result = client.stt.transcribe(data="invalid")
+except httpx.HTTPStatusError as e:
+    print(f"HTTP Error: {e.response.status_code}")
+    print(f"Response: {e.response.text}")
+except ValueError as e:
+    print(f"Validation Error: {e}")
+```
+## Development
+### Install Development Dependencies
+```bash
+uv sync --group dev
+```
+### Running Unit Tests
+The SDK uses `pytest` for unit testing. All test files are located in the `tests/` directory.
+#### Run All Tests
+```bash
+uv run pytest
+```
+#### Run Tests with Coverage Report
+```bash
+uv run pytest --cov=gl_speech_sdk --cov-report=term-missing
+```
+This will show:
+- Test coverage percentage
+- Which lines are covered/missing
+- Coverage report in the terminal
+#### Run Specific Test Files
+```bash
+# Run only client tests
+uv run pytest tests/test_client.py
+# Run only STT tests
+uv run pytest tests/test_stt.py
+# Run only TTS tests
+uv run pytest tests/test_tts.py
+# Run only webhook tests
+uv run pytest tests/test_webhooks.py
+```
+#### Run Specific Test Functions
+```bash
+# Run a specific test function
+uv run pytest tests/test_client.py::test_client_initialization_required_only
+# Run tests matching a pattern
+uv run pytest -k "test_client"
+```
+#### Run Tests with Verbose Output
+```bash
+# Show detailed output for each test
+uv run pytest -v
+# Show even more details including print statements
+uv run pytest -v -s
+```
+#### Run Tests in Parallel (if pytest-xdist is installed)
+```bash
+uv run pytest -n auto
+```
+#### Other Useful Options
+```bash
+# Stop on first failure
+uv run pytest -x
+# Show local variables on failure
+uv run pytest -l
+# Run tests matching a keyword
+uv run pytest -k "stt"  # Run all STT-related tests
+```
+### Run Linting
+```bash
+uv run ruff check .
+uv run ruff format .
+```
+## API Reference
+- [Prosa STT API Documentation](https://docs2.prosa.ai/speech/stt/rest/api/)
+- [Prosa TTS API Documentation](https://docs2.prosa.ai/speech/tts/rest/api/)
+- [Prosa Webhook API Documentation](https://docs2.prosa.ai/speech/webhook/rest/api/)
+## License
+MIT License - see LICENSE file for details.

gl_speech_sdk-0.0.1b1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+gl_speech_sdk/__init__.py,sha256=ENmGVx7-IUl5aOB2um-aFLlRQ00-9MQ_RcPzoroQhEM,1508
+gl_speech_sdk/client.py,sha256=dn_zhDwQTAFa9llrFVaT3IlE7uFZ3H4saWlBwYOGIl8,3075
+gl_speech_sdk/models.py,sha256=SGqZJsD35LBo5mxu0rfIPTUdZ9I0E1hf_dEgleG0Yls,9812
+gl_speech_sdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+gl_speech_sdk/stt.py,sha256=IgEAI-1ghhWzsONaI-zOzUurQ-XGRFbAwuBzq9DAqVo,16420
+gl_speech_sdk/tts.py,sha256=lfeldRiD3CxH4LMKTPuFxmu7rLL4Mgg2kj_EZo_y5Uk,14680
+gl_speech_sdk/webhooks.py,sha256=_DJUHcdHAtvpB1sSww5kSaUAESsyegcI-VkO1cRr1TU,18966
+gl_speech_sdk-0.0.1b1.dist-info/METADATA,sha256=Dz7CTCk0C_etW104lefkHWg95TYpigdUKcMrozI2sV0,8323
+gl_speech_sdk-0.0.1b1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+gl_speech_sdk-0.0.1b1.dist-info/licenses/LICENSE,sha256=GLqGo4o1hC8CcufoToJGtPfKsvPmLsVIk2x97xDBD3I,1065
+gl_speech_sdk-0.0.1b1.dist-info/RECORD,,

gl_speech_sdk-0.0.1b1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

gl_speech_sdk-0.0.1b1.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 GDP Labs
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.