livellm 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livellm
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Python client for the LiveLLM Server
5
5
  Project-URL: Homepage, https://github.com/qalby-tech/livellm-client-py
6
6
  Project-URL: Repository, https://github.com/qalby-tech/livellm-client-py
@@ -17,6 +17,7 @@ Classifier: Typing :: Typed
17
17
  Requires-Python: >=3.10
18
18
  Requires-Dist: httpx>=0.27.0
19
19
  Requires-Dist: pydantic>=2.0.0
20
+ Requires-Dist: websockets>=15.0.1
20
21
  Provides-Extra: testing
21
22
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'testing'
22
23
  Requires-Dist: pytest-cov>=4.1.0; extra == 'testing'
@@ -32,12 +33,13 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
32
33
 
33
34
  ## Features
34
35
 
35
- - 🚀 **Async-first** - Built on httpx for high-performance operations
36
+ - 🚀 **Async-first** - Built on httpx and websockets for high-performance operations
36
37
  - 🔒 **Type-safe** - Full type hints and Pydantic validation
37
38
  - 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
38
39
  - 🔄 **Streaming** - Real-time streaming for agent and audio
39
40
  - 🛠️ **Flexible API** - Use request objects or keyword arguments
40
41
  - 🎙️ **Audio services** - Text-to-speech and transcription
42
+ - 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
41
43
  - ⚡ **Fallback strategies** - Sequential and parallel handling
42
44
  - 🧹 **Auto cleanup** - Context managers and garbage collection
43
45
 
@@ -326,6 +328,123 @@ transcription = await client.transcribe(
326
328
  )
327
329
  ```
328
330
 
331
+ ### Real-Time Transcription (WebSocket)
332
+
333
+ The realtime transcription API is available either **directly** via `TranscriptionWsClient` or **through** `LivellmClient.realtime.transcription`.
334
+
335
+ #### Using `TranscriptionWsClient` directly
336
+
337
+ ```python
338
+ import asyncio
339
+ from livellm import TranscriptionWsClient
340
+ from livellm.models import (
341
+ TranscriptionInitWsRequest,
342
+ TranscriptionAudioChunkWsRequest,
343
+ SpeakMimeType,
344
+ )
345
+
346
+ async def transcribe_live_direct():
347
+ base_url = "ws://localhost:8000" # WebSocket base URL
348
+
349
+ async with TranscriptionWsClient(base_url, timeout=30) as client:
350
+ # Define audio source (file, microphone, stream, etc.)
351
+ async def audio_source():
352
+ with open("audio.pcm", "rb") as f:
353
+ while chunk := f.read(4096):
354
+ yield TranscriptionAudioChunkWsRequest(audio=chunk)
355
+ await asyncio.sleep(0.1) # Simulate real-time
356
+
357
+ # Initialize transcription session
358
+ init_request = TranscriptionInitWsRequest(
359
+ provider_uid="openai",
360
+ model="gpt-4o-mini-transcribe",
361
+ language="en", # or "auto" for detection
362
+ input_sample_rate=24000,
363
+ input_audio_format=SpeakMimeType.PCM,
364
+ gen_config={},
365
+ )
366
+
367
+ # Stream audio and receive transcriptions
368
+ async for response in client.start_session(init_request, audio_source()):
369
+ print(f"Transcription: {response.transcription}")
370
+ if response.is_end:
371
+ print("Transcription complete!")
372
+ break
373
+
374
+ asyncio.run(transcribe_live_direct())
375
+ ```
376
+
377
+ #### Using `LivellmClient.realtime.transcription` (and running agents while listening)
378
+
379
+ ```python
380
+ import asyncio
381
+ from livellm import LivellmClient
382
+ from livellm.models import (
383
+ TextMessage,
384
+ TranscriptionInitWsRequest,
385
+ TranscriptionAudioChunkWsRequest,
386
+ SpeakMimeType,
387
+ )
388
+
389
+ async def transcribe_and_chat():
390
+ # Central HTTP client; .realtime and .transcription expose WebSocket APIs
391
+ client = LivellmClient(base_url="http://localhost:8000", timeout=30)
392
+
393
+ async with client.realtime as realtime:
394
+ async with realtime.transcription as t_client:
395
+ async def audio_source():
396
+ with open("audio.pcm", "rb") as f:
397
+ while chunk := f.read(4096):
398
+ yield TranscriptionAudioChunkWsRequest(audio=chunk)
399
+ await asyncio.sleep(0.1)
400
+
401
+ init_request = TranscriptionInitWsRequest(
402
+ provider_uid="openai",
403
+ model="gpt-4o-mini-transcribe",
404
+ language="en",
405
+ input_sample_rate=24000,
406
+ input_audio_format=SpeakMimeType.PCM,
407
+ gen_config={},
408
+ )
409
+
410
+ # Listen for transcriptions and, for each chunk, run an agent request
411
+ async for resp in t_client.start_session(init_request, audio_source()):
412
+ print("User said:", resp.transcription)
413
+
414
+ # You can call agent_run (or speak, etc.) while the transcription stream is active
415
+ agent_response = await realtime.agent_run(
416
+ provider_uid="openai",
417
+ model="gpt-4",
418
+ messages=[
419
+ TextMessage(role="user", content=resp.transcription),
420
+ ],
421
+ temperature=0.7,
422
+ )
423
+ print("Agent:", agent_response.output)
424
+
425
+ if resp.is_end:
426
+ print("Transcription session complete")
427
+ break
428
+
429
+ asyncio.run(transcribe_and_chat())
430
+ ```
431
+
432
+ **Supported Audio Formats:**
433
+ - **PCM**: 16-bit uncompressed (recommended)
434
+ - **μ-law**: 8-bit telephony format (North America/Japan)
435
+ - **A-law**: 8-bit telephony format (Europe/rest of world)
436
+
437
+ **Use Cases:**
438
+ - 🎙️ Voice assistants and chatbots
439
+ - 📝 Live captioning and subtitles
440
+ - 🎤 Meeting transcription
441
+ - 🗣️ Voice commands and control
442
+
443
+ **See also:**
444
+ - [TRANSCRIPTION_CLIENT.md](TRANSCRIPTION_CLIENT.md) - Complete transcription guide
445
+ - [example_transcription.py](example_transcription.py) - Python examples
446
+ - [example_transcription_browser.html](example_transcription_browser.html) - Browser demo
447
+
329
448
  ### Fallback Strategies
330
449
 
331
450
  Handle failures automatically with sequential or parallel fallback:
@@ -418,6 +537,12 @@ response = await client.ping()
418
537
  - `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
419
538
  - `transcribe(request | **kwargs)` - Speech-to-text
420
539
 
540
+ **Real-Time Transcription (TranscriptionWsClient)**
541
+ - `connect()` - Establish WebSocket connection
542
+ - `disconnect()` - Close WebSocket connection
543
+ - `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
544
+ - `async with client:` - Auto connection management (recommended)
545
+
421
546
  **Cleanup**
422
547
  - `cleanup()` - Release resources
423
548
  - `async with client:` - Auto cleanup (recommended)
@@ -437,6 +562,8 @@ response = await client.ping()
437
562
  - `AgentRequest(provider_uid, model, messages, tools?, gen_config?)`
438
563
  - `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
439
564
  - `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
565
+ - `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
566
+ - `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
440
567
 
441
568
  **Tools**
442
569
  - `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
@@ -450,6 +577,7 @@ response = await client.ping()
450
577
  **Responses**
451
578
  - `AgentResponse(output, usage{input_tokens, output_tokens}, ...)`
452
579
  - `TranscribeResponse(text, language)`
580
+ - `TranscriptionWsResponse(transcription, is_end)` - Real-time transcription result
453
581
 
454
582
  ## Error Handling
455
583
 
@@ -486,6 +614,15 @@ mypy livellm
486
614
  - Python 3.10+
487
615
  - httpx >= 0.27.0
488
616
  - pydantic >= 2.0.0
617
+ - websockets >= 15.0.1
618
+
619
+ ## Documentation
620
+
621
+ - [README.md](README.md) - Main documentation (you are here)
622
+ - [TRANSCRIPTION_CLIENT.md](TRANSCRIPTION_CLIENT.md) - Complete real-time transcription guide
623
+ - [CLIENT_EXAMPLES.md](CLIENT_EXAMPLES.md) - Usage examples for all features
624
+ - [example_transcription.py](example_transcription.py) - Python transcription examples
625
+ - [example_transcription_browser.html](example_transcription_browser.html) - Browser demo
489
626
 
490
627
  ## Links
491
628
 
@@ -7,12 +7,13 @@ Python client library for the LiveLLM Server - a unified proxy for AI agent, aud
7
7
 
8
8
  ## Features
9
9
 
10
- - 🚀 **Async-first** - Built on httpx for high-performance operations
10
+ - 🚀 **Async-first** - Built on httpx and websockets for high-performance operations
11
11
  - 🔒 **Type-safe** - Full type hints and Pydantic validation
12
12
  - 🎯 **Multi-provider** - OpenAI, Google, Anthropic, Groq, ElevenLabs
13
13
  - 🔄 **Streaming** - Real-time streaming for agent and audio
14
14
  - 🛠️ **Flexible API** - Use request objects or keyword arguments
15
15
  - 🎙️ **Audio services** - Text-to-speech and transcription
16
+ - 🎤 **Real-Time Transcription** - WebSocket-based live audio transcription with bidirectional streaming
16
17
  - ⚡ **Fallback strategies** - Sequential and parallel handling
17
18
  - 🧹 **Auto cleanup** - Context managers and garbage collection
18
19
 
@@ -301,6 +302,123 @@ transcription = await client.transcribe(
301
302
  )
302
303
  ```
303
304
 
305
+ ### Real-Time Transcription (WebSocket)
306
+
307
+ The realtime transcription API is available either **directly** via `TranscriptionWsClient` or **through** `LivellmClient.realtime.transcription`.
308
+
309
+ #### Using `TranscriptionWsClient` directly
310
+
311
+ ```python
312
+ import asyncio
313
+ from livellm import TranscriptionWsClient
314
+ from livellm.models import (
315
+ TranscriptionInitWsRequest,
316
+ TranscriptionAudioChunkWsRequest,
317
+ SpeakMimeType,
318
+ )
319
+
320
+ async def transcribe_live_direct():
321
+ base_url = "ws://localhost:8000" # WebSocket base URL
322
+
323
+ async with TranscriptionWsClient(base_url, timeout=30) as client:
324
+ # Define audio source (file, microphone, stream, etc.)
325
+ async def audio_source():
326
+ with open("audio.pcm", "rb") as f:
327
+ while chunk := f.read(4096):
328
+ yield TranscriptionAudioChunkWsRequest(audio=chunk)
329
+ await asyncio.sleep(0.1) # Simulate real-time
330
+
331
+ # Initialize transcription session
332
+ init_request = TranscriptionInitWsRequest(
333
+ provider_uid="openai",
334
+ model="gpt-4o-mini-transcribe",
335
+ language="en", # or "auto" for detection
336
+ input_sample_rate=24000,
337
+ input_audio_format=SpeakMimeType.PCM,
338
+ gen_config={},
339
+ )
340
+
341
+ # Stream audio and receive transcriptions
342
+ async for response in client.start_session(init_request, audio_source()):
343
+ print(f"Transcription: {response.transcription}")
344
+ if response.is_end:
345
+ print("Transcription complete!")
346
+ break
347
+
348
+ asyncio.run(transcribe_live_direct())
349
+ ```
350
+
351
+ #### Using `LivellmClient.realtime.transcription` (and running agents while listening)
352
+
353
+ ```python
354
+ import asyncio
355
+ from livellm import LivellmClient
356
+ from livellm.models import (
357
+ TextMessage,
358
+ TranscriptionInitWsRequest,
359
+ TranscriptionAudioChunkWsRequest,
360
+ SpeakMimeType,
361
+ )
362
+
363
+ async def transcribe_and_chat():
364
+ # Central HTTP client; .realtime and .transcription expose WebSocket APIs
365
+ client = LivellmClient(base_url="http://localhost:8000", timeout=30)
366
+
367
+ async with client.realtime as realtime:
368
+ async with realtime.transcription as t_client:
369
+ async def audio_source():
370
+ with open("audio.pcm", "rb") as f:
371
+ while chunk := f.read(4096):
372
+ yield TranscriptionAudioChunkWsRequest(audio=chunk)
373
+ await asyncio.sleep(0.1)
374
+
375
+ init_request = TranscriptionInitWsRequest(
376
+ provider_uid="openai",
377
+ model="gpt-4o-mini-transcribe",
378
+ language="en",
379
+ input_sample_rate=24000,
380
+ input_audio_format=SpeakMimeType.PCM,
381
+ gen_config={},
382
+ )
383
+
384
+ # Listen for transcriptions and, for each chunk, run an agent request
385
+ async for resp in t_client.start_session(init_request, audio_source()):
386
+ print("User said:", resp.transcription)
387
+
388
+ # You can call agent_run (or speak, etc.) while the transcription stream is active
389
+ agent_response = await realtime.agent_run(
390
+ provider_uid="openai",
391
+ model="gpt-4",
392
+ messages=[
393
+ TextMessage(role="user", content=resp.transcription),
394
+ ],
395
+ temperature=0.7,
396
+ )
397
+ print("Agent:", agent_response.output)
398
+
399
+ if resp.is_end:
400
+ print("Transcription session complete")
401
+ break
402
+
403
+ asyncio.run(transcribe_and_chat())
404
+ ```
405
+
406
+ **Supported Audio Formats:**
407
+ - **PCM**: 16-bit uncompressed (recommended)
408
+ - **μ-law**: 8-bit telephony format (North America/Japan)
409
+ - **A-law**: 8-bit telephony format (Europe/rest of world)
410
+
411
+ **Use Cases:**
412
+ - 🎙️ Voice assistants and chatbots
413
+ - 📝 Live captioning and subtitles
414
+ - 🎤 Meeting transcription
415
+ - 🗣️ Voice commands and control
416
+
417
+ **See also:**
418
+ - [TRANSCRIPTION_CLIENT.md](TRANSCRIPTION_CLIENT.md) - Complete transcription guide
419
+ - [example_transcription.py](example_transcription.py) - Python examples
420
+ - [example_transcription_browser.html](example_transcription_browser.html) - Browser demo
421
+
304
422
  ### Fallback Strategies
305
423
 
306
424
  Handle failures automatically with sequential or parallel fallback:
@@ -393,6 +511,12 @@ response = await client.ping()
393
511
  - `speak_stream(request | **kwargs)` - Text-to-speech (streaming)
394
512
  - `transcribe(request | **kwargs)` - Speech-to-text
395
513
 
514
+ **Real-Time Transcription (TranscriptionWsClient)**
515
+ - `connect()` - Establish WebSocket connection
516
+ - `disconnect()` - Close WebSocket connection
517
+ - `start_session(init_request, audio_source)` - Start bidirectional streaming transcription
518
+ - `async with client:` - Auto connection management (recommended)
519
+
396
520
  **Cleanup**
397
521
  - `cleanup()` - Release resources
398
522
  - `async with client:` - Auto cleanup (recommended)
@@ -412,6 +536,8 @@ response = await client.ping()
412
536
  - `AgentRequest(provider_uid, model, messages, tools?, gen_config?)`
413
537
  - `SpeakRequest(provider_uid, model, text, voice, mime_type, sample_rate, gen_config?)`
414
538
  - `TranscribeRequest(provider_uid, file, model, language?, gen_config?)`
539
+ - `TranscriptionInitWsRequest(provider_uid, model, language?, input_sample_rate?, input_audio_format?, gen_config?)`
540
+ - `TranscriptionAudioChunkWsRequest(audio)` - Audio chunk for streaming
415
541
 
416
542
  **Tools**
417
543
  - `WebSearchInput(kind=ToolKind.WEB_SEARCH, search_context_size)`
@@ -425,6 +551,7 @@ response = await client.ping()
425
551
  **Responses**
426
552
  - `AgentResponse(output, usage{input_tokens, output_tokens}, ...)`
427
553
  - `TranscribeResponse(text, language)`
554
+ - `TranscriptionWsResponse(transcription, is_end)` - Real-time transcription result
428
555
 
429
556
  ## Error Handling
430
557
 
@@ -461,6 +588,15 @@ mypy livellm
461
588
  - Python 3.10+
462
589
  - httpx >= 0.27.0
463
590
  - pydantic >= 2.0.0
591
+ - websockets >= 15.0.1
592
+
593
+ ## Documentation
594
+
595
+ - [README.md](README.md) - Main documentation (you are here)
596
+ - [TRANSCRIPTION_CLIENT.md](TRANSCRIPTION_CLIENT.md) - Complete real-time transcription guide
597
+ - [CLIENT_EXAMPLES.md](CLIENT_EXAMPLES.md) - Usage examples for all features
598
+ - [example_transcription.py](example_transcription.py) - Python transcription examples
599
+ - [example_transcription_browser.html](example_transcription_browser.html) - Browser demo
464
600
 
465
601
  ## Links
466
602
 
@@ -1,15 +1,19 @@
1
1
  """LiveLLM Client - Python client for the LiveLLM Proxy and Realtime APIs."""
2
2
 
3
- from .livellm import LivellmClient
3
+ from .livellm import LivellmClient, LivellmWsClient, BaseLivellmClient
4
+ from .transcripton import TranscriptionWsClient
4
5
  from . import models
5
6
 
6
- __version__ = "1.1.0"
7
+ __version__ = "1.2.0"
7
8
 
8
9
  __all__ = [
9
10
  # Version
10
11
  "__version__",
11
12
  # Classes
12
13
  "LivellmClient",
14
+ "LivellmWsClient",
15
+ "BaseLivellmClient",
16
+ "TranscriptionWsClient",
13
17
  # Models
14
18
  *models.__all__,
15
19
  ]