livekit-plugins-google 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,6 +75,7 @@ class InputTranscription:
75
75
  @dataclass
76
76
  class Capabilities:
77
77
  supports_truncate: bool
78
+ input_audio_sample_rate: int | None = None
78
79
 
79
80
 
80
81
  @dataclass
@@ -156,6 +157,7 @@ class RealtimeModel:
156
157
  super().__init__()
157
158
  self._capabilities = Capabilities(
158
159
  supports_truncate=False,
160
+ input_audio_sample_rate=16000,
159
161
  )
160
162
  self._model = model
161
163
  self._loop = loop or asyncio.get_event_loop()
@@ -307,8 +309,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
307
309
  self._init_sync_task = asyncio.create_task(asyncio.sleep(0))
308
310
  self._send_ch = utils.aio.Chan[ClientEvents]()
309
311
  self._active_response_id = None
310
- if chat_ctx:
311
- self.generate_reply(chat_ctx)
312
312
 
313
313
  async def aclose(self) -> None:
314
314
  if self._send_ch.closed:
@@ -336,25 +336,6 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
336
336
  def _queue_msg(self, msg: ClientEvents) -> None:
337
337
  self._send_ch.send_nowait(msg)
338
338
 
339
- def generate_reply(
340
- self,
341
- ctx: llm.ChatContext | llm.ChatMessage,
342
- turn_complete: bool = True,
343
- ) -> None:
344
- if isinstance(ctx, llm.ChatMessage) and isinstance(ctx.content, str):
345
- new_chat_ctx = llm.ChatContext()
346
- new_chat_ctx.append(text=ctx.content, role=ctx.role)
347
- elif isinstance(ctx, llm.ChatContext):
348
- new_chat_ctx = ctx
349
- else:
350
- raise ValueError("Invalid chat context")
351
- turns, _ = _build_gemini_ctx(new_chat_ctx, id(self))
352
- client_content = LiveClientContent(
353
- turn_complete=turn_complete,
354
- turns=turns,
355
- )
356
- self._queue_msg(client_content)
357
-
358
339
  def chat_ctx_copy(self) -> llm.ChatContext:
359
340
  return self._chat_ctx.copy()
360
341
 
@@ -370,7 +351,16 @@ class GeminiRealtimeSession(utils.EventEmitter[EventTypes]):
370
351
  "cancel_existing", "cancel_new", "keep_both"
371
352
  ] = "keep_both",
372
353
  ) -> None:
373
- raise NotImplementedError("create_response is not supported yet")
354
+ turns, _ = _build_gemini_ctx(self._chat_ctx, id(self))
355
+ ctx = [self._opts.instructions] + turns if self._opts.instructions else turns
356
+
357
+ if not ctx:
358
+ logger.warning(
359
+ "gemini-realtime-session: No chat context to send, sending dummy content."
360
+ )
361
+ ctx = [Content(parts=[Part(text=".")])]
362
+
363
+ self._queue_msg(LiveClientContent(turns=ctx, turn_complete=True))
374
364
 
375
365
  def commit_audio_buffer(self) -> None:
376
366
  raise NotImplementedError("commit_audio_buffer is not supported yet")
@@ -56,6 +56,7 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
56
56
  super().__init__()
57
57
  self._client = client
58
58
  self._model = model
59
+ self._needed_sr = 16000
59
60
  self._closed = False
60
61
  system_instructions = types.Content(
61
62
  parts=[types.Part(text=SYSTEM_INSTRUCTIONS)]
@@ -72,18 +73,37 @@ class TranscriberSession(utils.EventEmitter[EventTypes]):
72
73
  self._main_task(), name="gemini-realtime-transcriber"
73
74
  )
74
75
  self._send_ch = utils.aio.Chan[ClientEvents]()
76
+ self._resampler: rtc.AudioResampler | None = None
75
77
  self._active_response_id = None
76
78
 
77
79
  def _push_audio(self, frame: rtc.AudioFrame) -> None:
78
80
  if self._closed:
79
81
  return
80
- self._queue_msg(
81
- types.LiveClientRealtimeInput(
82
- media_chunks=[
83
- types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")
84
- ]
82
+ if frame.sample_rate != self._needed_sr:
83
+ if not self._resampler:
84
+ self._resampler = rtc.AudioResampler(
85
+ frame.sample_rate,
86
+ self._needed_sr,
87
+ quality=rtc.AudioResamplerQuality.HIGH,
88
+ )
89
+
90
+ if self._resampler:
91
+ for f in self._resampler.push(frame):
92
+ self._queue_msg(
93
+ types.LiveClientRealtimeInput(
94
+ media_chunks=[
95
+ types.Blob(data=f.data.tobytes(), mime_type="audio/pcm")
96
+ ]
97
+ )
98
+ )
99
+ else:
100
+ self._queue_msg(
101
+ types.LiveClientRealtimeInput(
102
+ media_chunks=[
103
+ types.Blob(data=frame.data.tobytes(), mime_type="audio/pcm")
104
+ ]
105
+ )
85
106
  )
86
- )
87
107
 
88
108
  def _queue_msg(self, msg: ClientEvents) -> None:
89
109
  if not self._closed:
@@ -108,8 +108,8 @@ class LLM(llm.LLM):
108
108
  self._api_key = api_key or os.environ.get("GOOGLE_API_KEY", None)
109
109
  _gac = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
110
110
  if _gac is None:
111
- raise ValueError(
112
- "`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file."
111
+ logger.warning(
112
+ "`GOOGLE_APPLICATION_CREDENTIALS` environment variable is not set. please set it to the path of the service account key file. Otherwise, use any of the other Google Cloud auth methods."
113
113
  )
114
114
 
115
115
  if vertexai:
@@ -89,9 +89,9 @@ class STT(stt.STT):
89
89
  detect_language: bool = True,
90
90
  interim_results: bool = True,
91
91
  punctuate: bool = True,
92
- spoken_punctuation: bool = True,
93
- model: SpeechModels = "long",
94
- location: str = "global",
92
+ spoken_punctuation: bool = False,
93
+ model: SpeechModels = "chirp_2",
94
+ location: str = "us-central1",
95
95
  sample_rate: int = 16000,
96
96
  credentials_info: dict | None = None,
97
97
  credentials_file: str | None = None,
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.10.0"
15
+ __version__ = "0.10.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: livekit-plugins-google
3
- Version: 0.10.0
3
+ Version: 0.10.2
4
4
  Summary: Agent Framework plugin for services from Google Cloud
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -23,7 +23,7 @@ Requires-Dist: google-auth<3,>=2
23
23
  Requires-Dist: google-cloud-speech<3,>=2
24
24
  Requires-Dist: google-cloud-texttospeech<3,>=2
25
25
  Requires-Dist: google-genai==0.5.0
26
- Requires-Dist: livekit-agents>=0.12.3
26
+ Requires-Dist: livekit-agents>=0.12.11
27
27
  Dynamic: classifier
28
28
  Dynamic: description
29
29
  Dynamic: description-content-type
@@ -1,18 +1,18 @@
1
1
  livekit/plugins/google/__init__.py,sha256=e_kSlFNmKhyyeliz7f4WOKc_Y0-y39QjO5nCWuguhss,1171
2
2
  livekit/plugins/google/_utils.py,sha256=mjsqblhGMgAZ2MNPisAVkNsqq4gfO6vvprEKzAGoVwE,7248
3
- livekit/plugins/google/llm.py,sha256=vL8iyRqWVPT0wCDeXTlybytlyJ-J-VolVQYqP-ZVlb0,16388
3
+ livekit/plugins/google/llm.py,sha256=o9EJBv3rS5vKRq7m5YjSSqOxtH6pPekxRS_lra35hzk,16445
4
4
  livekit/plugins/google/log.py,sha256=GI3YWN5YzrafnUccljzPRS_ZALkMNk1i21IRnTl2vNA,69
5
5
  livekit/plugins/google/models.py,sha256=w_qmOk5y86vjtszDiGpP9p0ctjQeaB8-UzqprxgpvCY,1407
6
6
  livekit/plugins/google/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- livekit/plugins/google/stt.py,sha256=E5kXPbicH4FEXBjyBzfqQWA-nPhKkojzcc-cbtWdmNs,21088
7
+ livekit/plugins/google/stt.py,sha256=FA6Lpeb8QvRXLzkQ7cjsoMxHdtEGwHWkpN_TKqAdKAQ,21097
8
8
  livekit/plugins/google/tts.py,sha256=95qXCigVQYWNbcN3pIKBpIah4b31U_MWtXv5Ji0AMc4,9229
9
- livekit/plugins/google/version.py,sha256=sAL7xgP18DEksjwYUwabcCgRgKAAGXSWs6xp7NgcxoU,601
9
+ livekit/plugins/google/version.py,sha256=jklx55q_NtxoIUiYD5AFOO11S_Jij8P491Y8nkw-VZk,601
10
10
  livekit/plugins/google/beta/__init__.py,sha256=AxRYc7NGG62Tv1MmcZVCDHNvlhbC86hM-_yP01Qb28k,47
11
11
  livekit/plugins/google/beta/realtime/__init__.py,sha256=sGTn6JFNyA30QUXBZ_BV3l2eHpGAzR35ByXxg77vWNU,205
12
12
  livekit/plugins/google/beta/realtime/api_proto.py,sha256=9EhmwgeIgKDqdSijv5Q9pgx7UhAakK02ZDwbnUsra_o,657
13
- livekit/plugins/google/beta/realtime/realtime_api.py,sha256=vCjDQZvHS749Gf-QOLo-RaW4HlQHlzuArd3IlN5xMmY,21459
14
- livekit/plugins/google/beta/realtime/transcriber.py,sha256=3TaYbtvPWHkxKlDSZSMLWBbR7KewBRg3HcdIxuGhl9c,5880
15
- livekit_plugins_google-0.10.0.dist-info/METADATA,sha256=lsA9pwlWHE-q-9x3HKn2EeJ7ZdcpjxzEtYs1wRH5axE,2057
16
- livekit_plugins_google-0.10.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
17
- livekit_plugins_google-0.10.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
18
- livekit_plugins_google-0.10.0.dist-info/RECORD,,
13
+ livekit/plugins/google/beta/realtime/realtime_api.py,sha256=OwNoPmmomMtRkmYw-g2u7hIYpeIrSSNky7FlcHBVyFQ,21150
14
+ livekit/plugins/google/beta/realtime/transcriber.py,sha256=JnZ75NyiOLkpvQ5N2nDniumDKcrjiq_tlryiLbuBoDM,6658
15
+ livekit_plugins_google-0.10.2.dist-info/METADATA,sha256=dTBdAuYpGyCFVJNw0c8upUEdaFgdodWwrm1bB3a4Xp4,2058
16
+ livekit_plugins_google-0.10.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
17
+ livekit_plugins_google-0.10.2.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
18
+ livekit_plugins_google-0.10.2.dist-info/RECORD,,