dv-pipecat-ai 0.0.75.dev883__py3-none-any.whl → 0.0.82.dev19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (121) hide show
  1. {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/RECORD +121 -80
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  22. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  23. pipecat/audio/dtmf/types.py +47 -0
  24. pipecat/audio/dtmf/utils.py +70 -0
  25. pipecat/audio/filters/aic_filter.py +199 -0
  26. pipecat/audio/utils.py +9 -7
  27. pipecat/extensions/ivr/__init__.py +0 -0
  28. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  29. pipecat/frames/frames.py +156 -43
  30. pipecat/pipeline/llm_switcher.py +76 -0
  31. pipecat/pipeline/parallel_pipeline.py +3 -3
  32. pipecat/pipeline/service_switcher.py +144 -0
  33. pipecat/pipeline/task.py +68 -28
  34. pipecat/pipeline/task_observer.py +10 -0
  35. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  36. pipecat/processors/aggregators/llm_context.py +277 -0
  37. pipecat/processors/aggregators/llm_response.py +48 -15
  38. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  39. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  40. pipecat/processors/dtmf_aggregator.py +0 -2
  41. pipecat/processors/filters/stt_mute_filter.py +0 -2
  42. pipecat/processors/frame_processor.py +18 -11
  43. pipecat/processors/frameworks/rtvi.py +17 -10
  44. pipecat/processors/metrics/sentry.py +2 -0
  45. pipecat/runner/daily.py +137 -36
  46. pipecat/runner/run.py +1 -1
  47. pipecat/runner/utils.py +7 -7
  48. pipecat/serializers/asterisk.py +145 -0
  49. pipecat/serializers/exotel.py +1 -1
  50. pipecat/serializers/plivo.py +1 -1
  51. pipecat/serializers/telnyx.py +1 -1
  52. pipecat/serializers/twilio.py +1 -1
  53. pipecat/services/__init__.py +2 -2
  54. pipecat/services/anthropic/llm.py +113 -28
  55. pipecat/services/asyncai/tts.py +4 -0
  56. pipecat/services/aws/llm.py +82 -8
  57. pipecat/services/aws/tts.py +0 -10
  58. pipecat/services/aws_nova_sonic/aws.py +5 -0
  59. pipecat/services/azure/llm.py +77 -1
  60. pipecat/services/cartesia/tts.py +28 -16
  61. pipecat/services/cerebras/llm.py +15 -10
  62. pipecat/services/deepgram/stt.py +8 -0
  63. pipecat/services/deepseek/llm.py +13 -8
  64. pipecat/services/elevenlabs/__init__.py +2 -0
  65. pipecat/services/elevenlabs/stt.py +351 -0
  66. pipecat/services/fireworks/llm.py +13 -8
  67. pipecat/services/fish/tts.py +8 -6
  68. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  69. pipecat/services/gladia/config.py +7 -1
  70. pipecat/services/gladia/stt.py +23 -15
  71. pipecat/services/google/llm.py +159 -59
  72. pipecat/services/google/llm_openai.py +18 -3
  73. pipecat/services/grok/llm.py +2 -1
  74. pipecat/services/llm_service.py +38 -3
  75. pipecat/services/mem0/memory.py +2 -1
  76. pipecat/services/mistral/llm.py +5 -6
  77. pipecat/services/nim/llm.py +2 -1
  78. pipecat/services/openai/base_llm.py +88 -26
  79. pipecat/services/openai/image.py +6 -1
  80. pipecat/services/openai_realtime_beta/openai.py +5 -2
  81. pipecat/services/openpipe/llm.py +6 -8
  82. pipecat/services/perplexity/llm.py +13 -8
  83. pipecat/services/playht/tts.py +9 -6
  84. pipecat/services/rime/tts.py +1 -1
  85. pipecat/services/sambanova/llm.py +18 -13
  86. pipecat/services/sarvam/tts.py +415 -10
  87. pipecat/services/speechmatics/stt.py +4 -4
  88. pipecat/services/tavus/video.py +1 -1
  89. pipecat/services/tts_service.py +15 -5
  90. pipecat/services/vistaar/llm.py +2 -5
  91. pipecat/transports/base_input.py +32 -19
  92. pipecat/transports/base_output.py +39 -5
  93. pipecat/transports/daily/__init__.py +0 -0
  94. pipecat/transports/daily/transport.py +2371 -0
  95. pipecat/transports/daily/utils.py +410 -0
  96. pipecat/transports/livekit/__init__.py +0 -0
  97. pipecat/transports/livekit/transport.py +1042 -0
  98. pipecat/transports/network/fastapi_websocket.py +12 -546
  99. pipecat/transports/network/small_webrtc.py +12 -922
  100. pipecat/transports/network/webrtc_connection.py +9 -595
  101. pipecat/transports/network/websocket_client.py +12 -481
  102. pipecat/transports/network/websocket_server.py +12 -487
  103. pipecat/transports/services/daily.py +9 -2334
  104. pipecat/transports/services/helpers/daily_rest.py +12 -396
  105. pipecat/transports/services/livekit.py +12 -975
  106. pipecat/transports/services/tavus.py +12 -757
  107. pipecat/transports/smallwebrtc/__init__.py +0 -0
  108. pipecat/transports/smallwebrtc/connection.py +612 -0
  109. pipecat/transports/smallwebrtc/transport.py +936 -0
  110. pipecat/transports/tavus/__init__.py +0 -0
  111. pipecat/transports/tavus/transport.py +770 -0
  112. pipecat/transports/websocket/__init__.py +0 -0
  113. pipecat/transports/websocket/client.py +494 -0
  114. pipecat/transports/websocket/fastapi.py +559 -0
  115. pipecat/transports/websocket/server.py +500 -0
  116. pipecat/transports/whatsapp/__init__.py +0 -0
  117. pipecat/transports/whatsapp/api.py +345 -0
  118. pipecat/transports/whatsapp/client.py +364 -0
  119. {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/WHEEL +0 -0
  120. {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/licenses/LICENSE +0 -0
  121. {dv_pipecat_ai-0.0.75.dev883.dist-info → dv_pipecat_ai-0.0.82.dev19.dist-info}/top_level.txt +0 -0
@@ -16,19 +16,20 @@ import json
16
16
  import os
17
17
  import uuid
18
18
  from dataclasses import dataclass
19
- from typing import Any, Dict, List, Optional
19
+ from typing import Any, AsyncIterator, Dict, List, Optional
20
20
 
21
21
  from loguru import logger
22
22
  from PIL import Image
23
23
  from pydantic import BaseModel, Field
24
24
 
25
- from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter
25
+ from pipecat.adapters.services.gemini_adapter import GeminiLLMAdapter, GeminiLLMInvocationParams
26
26
  from pipecat.frames.frames import (
27
27
  AudioRawFrame,
28
28
  Frame,
29
29
  FunctionCallCancelFrame,
30
30
  FunctionCallInProgressFrame,
31
31
  FunctionCallResultFrame,
32
+ LLMContextFrame,
32
33
  LLMFullResponseEndFrame,
33
34
  LLMFullResponseStartFrame,
34
35
  LLMMessagesFrame,
@@ -38,6 +39,7 @@ from pipecat.frames.frames import (
38
39
  VisionImageRawFrame,
39
40
  )
40
41
  from pipecat.metrics.metrics import LLMTokenUsage
42
+ from pipecat.processors.aggregators.llm_context import LLMContext
41
43
  from pipecat.processors.aggregators.llm_response import (
42
44
  LLMAssistantAggregatorParams,
43
45
  LLMUserAggregatorParams,
@@ -67,6 +69,7 @@ try:
67
69
  FunctionCall,
68
70
  FunctionResponse,
69
71
  GenerateContentConfig,
72
+ GenerateContentResponse,
70
73
  HttpOptions,
71
74
  Part,
72
75
  )
@@ -289,11 +292,11 @@ class GoogleLLMContext(OpenAILLMContext):
289
292
  # Add the converted messages to our existing messages
290
293
  self._messages.extend(converted_messages)
291
294
 
292
- def get_messages_for_logging(self):
295
+ def get_messages_for_logging(self) -> List[Dict[str, Any]]:
293
296
  """Get messages formatted for logging with sensitive data redacted.
294
297
 
295
298
  Returns:
296
- List of message dictionaries with inline data redacted.
299
+ List of messages in a format ready for logging.
297
300
  """
298
301
  msgs = []
299
302
  for message in self.messages:
@@ -418,7 +421,14 @@ class GoogleLLMContext(OpenAILLMContext):
418
421
  role = message["role"]
419
422
  content = message.get("content", [])
420
423
  if role == "system":
421
- self.system_message = content
424
+ # System instructions are returned as plain text
425
+ if isinstance(content, str):
426
+ self.system_message = content
427
+ elif isinstance(content, list):
428
+ # If content is a list, we assume it's a list of text parts, per the standard
429
+ self.system_message = " ".join(
430
+ part["text"] for part in content if part.get("type") == "text"
431
+ )
422
432
  return None
423
433
  elif role == "assistant":
424
434
  role = "model"
@@ -436,11 +446,20 @@ class GoogleLLMContext(OpenAILLMContext):
436
446
  )
437
447
  elif role == "tool":
438
448
  role = "model"
449
+ try:
450
+ response = json.loads(message["content"])
451
+ if isinstance(response, dict):
452
+ response_dict = response
453
+ else:
454
+ response_dict = {"value": response}
455
+ except Exception as e:
456
+ # Response might not be JSON-deserializable (e.g. plain text).
457
+ response_dict = {"value": message["content"]}
439
458
  parts.append(
440
459
  Part(
441
460
  function_response=FunctionResponse(
442
461
  name="tool_call_result", # seems to work to hard-code the same name every time
443
- response=json.loads(message["content"]),
462
+ response=response_dict,
444
463
  )
445
464
  )
446
465
  )
@@ -636,9 +655,8 @@ class GoogleLLMService(LLMService):
636
655
  """Google AI (Gemini) LLM service implementation.
637
656
 
638
657
  This class implements inference with Google's AI models, translating internally
639
- from OpenAILLMContext to the messages format expected by the Google AI model.
640
- We use OpenAILLMContext as a lingua franca for all LLM services to enable
641
- easy switching between different LLMs.
658
+ from an OpenAILLMContext or a universal LLMContext to the messages format
659
+ expected by the Google AI model.
642
660
  """
643
661
 
644
662
  # Overriding the default adapter to use the Gemini one.
@@ -715,6 +733,44 @@ class GoogleLLMService(LLMService):
715
733
  def _create_client(self, api_key: str, http_options: Optional[HttpOptions] = None):
716
734
  self._client = genai.Client(api_key=api_key, http_options=http_options)
717
735
 
736
+ async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
737
+ """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
738
+
739
+ Args:
740
+ context: The LLM context containing conversation history.
741
+
742
+ Returns:
743
+ The LLM's response as a string, or None if no response is generated.
744
+ """
745
+ messages = []
746
+ system = []
747
+ if isinstance(context, LLMContext):
748
+ adapter = self.get_llm_adapter()
749
+ params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
750
+ messages = params["messages"]
751
+ system = params["system_instruction"]
752
+ else:
753
+ context = GoogleLLMContext.upgrade_to_google(context)
754
+ messages = context.messages
755
+ system = getattr(context, "system_message", None)
756
+
757
+ generation_config = GenerateContentConfig(system_instruction=system)
758
+
759
+ # Use the new google-genai client's async method
760
+ response = await self._client.aio.models.generate_content(
761
+ model=self._model_name,
762
+ contents=messages,
763
+ config=generation_config,
764
+ )
765
+
766
+ # Extract text from response
767
+ if response.candidates and response.candidates[0].content:
768
+ for part in response.candidates[0].content.parts:
769
+ if part.text:
770
+ return part.text
771
+
772
+ return None
773
+
718
774
  def needs_mcp_alternate_schema(self) -> bool:
719
775
  """Check if this LLM service requires alternate MCP schema.
720
776
 
@@ -740,8 +796,87 @@ class GoogleLLMService(LLMService):
740
796
  except Exception as e:
741
797
  logger.exception(f"Failed to unset thinking budget: {e}")
742
798
 
799
+ async def _stream_content(
800
+ self, params_from_context: GeminiLLMInvocationParams
801
+ ) -> AsyncIterator[GenerateContentResponse]:
802
+ messages = params_from_context["messages"]
803
+ if (
804
+ params_from_context["system_instruction"]
805
+ and self._system_instruction != params_from_context["system_instruction"]
806
+ ):
807
+ logger.debug(f"System instruction changed: {params_from_context['system_instruction']}")
808
+ self._system_instruction = params_from_context["system_instruction"]
809
+
810
+ tools = []
811
+ if params_from_context["tools"]:
812
+ tools = params_from_context["tools"]
813
+ elif self._tools:
814
+ tools = self._tools
815
+ tool_config = None
816
+ if self._tool_config:
817
+ tool_config = self._tool_config
818
+
819
+ # Filter out None values and create GenerationContentConfig
820
+ generation_params = {
821
+ k: v
822
+ for k, v in {
823
+ "system_instruction": self._system_instruction,
824
+ "temperature": self._settings["temperature"],
825
+ "top_p": self._settings["top_p"],
826
+ "top_k": self._settings["top_k"],
827
+ "max_output_tokens": self._settings["max_tokens"],
828
+ "tools": tools,
829
+ "tool_config": tool_config,
830
+ }.items()
831
+ if v is not None
832
+ }
833
+
834
+ if self._settings["extra"]:
835
+ generation_params.update(self._settings["extra"])
836
+
837
+ # possibly modify generation_params (in place) to set thinking to off by default
838
+ self._maybe_unset_thinking_budget(generation_params)
839
+
840
+ generation_config = (
841
+ GenerateContentConfig(**generation_params) if generation_params else None
842
+ )
843
+
844
+ await self.start_ttfb_metrics()
845
+ return await self._client.aio.models.generate_content_stream(
846
+ model=self._model_name,
847
+ contents=messages,
848
+ config=generation_config,
849
+ )
850
+
851
+ async def _stream_content_specific_context(
852
+ self, context: OpenAILLMContext
853
+ ) -> AsyncIterator[GenerateContentResponse]:
854
+ logger.debug(
855
+ f"{self}: Generating chat from LLM-specific context [{context.system_message}] | {context.get_messages_for_logging()}"
856
+ )
857
+
858
+ params = GeminiLLMInvocationParams(
859
+ messages=context.messages,
860
+ system_instruction=context.system_message,
861
+ tools=context.tools,
862
+ )
863
+
864
+ return await self._stream_content(params)
865
+
866
+ async def _stream_content_universal_context(
867
+ self, context: LLMContext
868
+ ) -> AsyncIterator[GenerateContentResponse]:
869
+ adapter = self.get_llm_adapter()
870
+ params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params(context)
871
+
872
+ logger.debug(
873
+ f"{self}: Generating chat from universal context [{params['system_instruction']}] | {adapter.get_messages_for_logging(context)}"
874
+ )
875
+
876
+ return await self._stream_content(params)
877
+
743
878
  @traced_llm
744
- async def _process_context(self, context: OpenAILLMContext):
879
+ async def _process_context(self, context: OpenAILLMContext | LLMContext):
745
880
  await self.push_frame(LLMFullResponseStartFrame())
746
881
 
747
882
  prompt_tokens = 0
@@ -754,55 +889,11 @@ class GoogleLLMService(LLMService):
754
889
  search_result = ""
755
890
 
756
891
  try:
757
- self.logger.debug(
758
- # f"{self}: Generating chat [{self._system_instruction}] | [{context.get_messages_for_logging()}]"
759
- f"{self}: Generating chat [{context.get_messages_for_logging()}]"
760
- )
761
-
762
- messages = context.messages
763
- if context.system_message and self._system_instruction != context.system_message:
764
- self.logger.debug(f"System instruction changed: {context.system_message}")
765
- self._system_instruction = context.system_message
766
-
767
- tools = []
768
- if context.tools:
769
- tools = context.tools
770
- elif self._tools:
771
- tools = self._tools
772
- tool_config = None
773
- if self._tool_config:
774
- tool_config = self._tool_config
775
-
776
- # Filter out None values and create GenerationContentConfig
777
- generation_params = {
778
- k: v
779
- for k, v in {
780
- "system_instruction": self._system_instruction,
781
- "temperature": self._settings["temperature"],
782
- "top_p": self._settings["top_p"],
783
- "top_k": self._settings["top_k"],
784
- "max_output_tokens": self._settings["max_tokens"],
785
- "tools": tools,
786
- "tool_config": tool_config,
787
- }.items()
788
- if v is not None
789
- }
790
-
791
- if self._settings["extra"]:
792
- generation_params.update(self._settings["extra"])
793
-
794
- # possibly modify generation_params (in place) to set thinking to off by default
795
- self._maybe_unset_thinking_budget(generation_params)
796
-
797
- generation_config = (
798
- GenerateContentConfig(**generation_params) if generation_params else None
799
- )
800
-
801
- await self.start_ttfb_metrics()
802
- response = await self._client.aio.models.generate_content_stream(
803
- model=self._model_name,
804
- contents=messages,
805
- config=generation_config,
892
+ # Generate content using either OpenAILLMContext or universal LLMContext
893
+ response = await (
894
+ self._stream_content_specific_context(context)
895
+ if isinstance(context, OpenAILLMContext)
896
+ else self._stream_content_universal_context(context)
806
897
  )
807
898
 
808
899
  function_calls = []
@@ -915,9 +1006,18 @@ class GoogleLLMService(LLMService):
915
1006
 
916
1007
  if isinstance(frame, OpenAILLMContextFrame):
917
1008
  context = GoogleLLMContext.upgrade_to_google(frame.context)
1009
+ elif isinstance(frame, LLMContextFrame):
1010
+ # Handle universal (LLM-agnostic) LLM context frames
1011
+ context = frame.context
918
1012
  elif isinstance(frame, LLMMessagesFrame):
1013
+ # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal
1014
+ # LLMContext with it
919
1015
  context = GoogleLLMContext(frame.messages)
920
1016
  elif isinstance(frame, VisionImageRawFrame):
1017
+ # This is only useful in very simple pipelines because it creates
1018
+ # a new context. Generally we want a context manager to catch
1019
+ # UserImageRawFrames coming through the pipeline and add them
1020
+ # to the context.
921
1021
  context = GoogleLLMContext()
922
1022
  context.add_image_frame_message(
923
1023
  format=frame.format, size=frame.size, image=frame.image, text=frame.text
@@ -41,6 +41,10 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
41
41
  Note: This service includes a workaround for a Google API bug where function
42
42
  call indices may be incorrectly set to None, resulting in empty function names.
43
43
 
44
+ .. deprecated:: 0.0.82
45
+ GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version.
46
+ Use GoogleLLMService instead for better integration with Google's native API.
47
+
44
48
  Reference:
45
49
  https://ai.google.dev/gemini-api/docs/openai
46
50
  """
@@ -61,6 +65,17 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
61
65
  model: Google model name to use (e.g., "gemini-2.0-flash").
62
66
  **kwargs: Additional arguments passed to the parent OpenAILLMService.
63
67
  """
68
+ import warnings
69
+
70
+ with warnings.catch_warnings():
71
+ warnings.simplefilter("always")
72
+ warnings.warn(
73
+ "GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version. "
74
+ "Use GoogleLLMService instead for better integration with Google's native API.",
75
+ DeprecationWarning,
76
+ stacklevel=2,
77
+ )
78
+
64
79
  super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs)
65
80
 
66
81
  async def _process_context(self, context: OpenAILLMContext):
@@ -74,9 +89,9 @@ class GoogleLLMOpenAIBetaService(OpenAILLMService):
74
89
 
75
90
  await self.start_ttfb_metrics()
76
91
 
77
- chunk_stream: AsyncStream[ChatCompletionChunk] = await self._stream_chat_completions(
78
- context
79
- )
92
+ chunk_stream: AsyncStream[
93
+ ChatCompletionChunk
94
+ ] = await self._stream_chat_completions_specific_context(context)
80
95
 
81
96
  async for chunk in chunk_stream:
82
97
  if chunk.usage:
@@ -16,6 +16,7 @@ from dataclasses import dataclass
16
16
  from loguru import logger
17
17
 
18
18
  from pipecat.metrics.metrics import LLMTokenUsage
19
+ from pipecat.processors.aggregators.llm_context import LLMContext
19
20
  from pipecat.processors.aggregators.llm_response import (
20
21
  LLMAssistantAggregatorParams,
21
22
  LLMUserAggregatorParams,
@@ -107,7 +108,7 @@ class GrokLLMService(OpenAILLMService):
107
108
  logger.debug(f"Creating Grok client with api {base_url}")
108
109
  return super().create_client(api_key, base_url, **kwargs)
109
110
 
110
- async def _process_context(self, context: OpenAILLMContext):
111
+ async def _process_context(self, context: OpenAILLMContext | LLMContext):
111
112
  """Process a context through the LLM and accumulate token usage metrics.
112
113
 
113
114
  This method overrides the parent class implementation to handle Grok's
@@ -36,10 +36,15 @@ from pipecat.frames.frames import (
36
36
  FunctionCallResultFrame,
37
37
  FunctionCallResultProperties,
38
38
  FunctionCallsStartedFrame,
39
+ LLMConfigureOutputFrame,
40
+ LLMFullResponseEndFrame,
41
+ LLMFullResponseStartFrame,
42
+ LLMTextFrame,
39
43
  StartFrame,
40
44
  StartInterruptionFrame,
41
45
  UserImageRequestFrame,
42
46
  )
47
+ from pipecat.processors.aggregators.llm_context import LLMContext
43
48
  from pipecat.processors.aggregators.llm_response import (
44
49
  LLMAssistantAggregatorParams,
45
50
  LLMUserAggregatorParams,
@@ -88,7 +93,7 @@ class FunctionCallParams:
88
93
  tool_call_id: str
89
94
  arguments: Mapping[str, Any]
90
95
  llm: "LLMService"
91
- context: OpenAILLMContext
96
+ context: OpenAILLMContext | LLMContext
92
97
  result_callback: FunctionCallResultCallback
93
98
 
94
99
 
@@ -129,7 +134,7 @@ class FunctionCallRunnerItem:
129
134
  function_name: str
130
135
  tool_call_id: str
131
136
  arguments: Mapping[str, Any]
132
- context: OpenAILLMContext
137
+ context: OpenAILLMContext | LLMContext
133
138
  run_llm: Optional[bool] = None
134
139
 
135
140
 
@@ -177,6 +182,7 @@ class LLMService(AIService):
177
182
  self._function_call_tasks: Dict[asyncio.Task, FunctionCallRunnerItem] = {}
178
183
  self._sequential_runner_task: Optional[asyncio.Task] = None
179
184
  self._tracing_enabled: bool = False
185
+ self._skip_tts: bool = False
180
186
 
181
187
  self._register_event_handler("on_function_calls_started")
182
188
  self._register_event_handler("on_completion_timeout")
@@ -189,6 +195,19 @@ class LLMService(AIService):
189
195
  """
190
196
  return self._adapter
191
197
 
198
+ async def run_inference(self, context: LLMContext | OpenAILLMContext) -> Optional[str]:
199
+ """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
200
+
201
+ Must be implemented by subclasses.
202
+
203
+ Args:
204
+ context: The LLM context containing conversation history.
205
+
206
+ Returns:
207
+ The LLM's response as a string, or None if no response is generated.
208
+ """
209
+ raise NotImplementedError(f"run_inference() not supported by {self.__class__.__name__}")
210
+
192
211
  def create_context_aggregator(
193
212
  self,
194
213
  context: OpenAILLMContext,
@@ -252,6 +271,20 @@ class LLMService(AIService):
252
271
 
253
272
  if isinstance(frame, StartInterruptionFrame):
254
273
  await self._handle_interruptions(frame)
274
+ elif isinstance(frame, LLMConfigureOutputFrame):
275
+ self._skip_tts = frame.skip_tts
276
+
277
+ async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
278
+ """Pushes a frame.
279
+
280
+ Args:
281
+ frame: The frame to push.
282
+ direction: The direction of frame pushing.
283
+ """
284
+ if isinstance(frame, (LLMTextFrame, LLMFullResponseStartFrame, LLMFullResponseEndFrame)):
285
+ frame.skip_tts = self._skip_tts
286
+
287
+ await super().push_frame(frame, direction)
255
288
 
256
289
  async def _handle_interruptions(self, _: StartInterruptionFrame):
257
290
  # logger.info("In LLM Handling interruptions")
@@ -434,7 +467,9 @@ class LLMService(AIService):
434
467
  else:
435
468
  await self._sequential_runner_queue.put(runner_item)
436
469
 
437
- async def _call_start_function(self, context: OpenAILLMContext, function_name: str):
470
+ async def _call_start_function(
471
+ self, context: OpenAILLMContext | LLMContext, function_name: str
472
+ ):
438
473
  if function_name in self._start_callbacks.keys():
439
474
  await self._start_callbacks[function_name](function_name, self, context)
440
475
  elif None in self._start_callbacks.keys():
@@ -120,6 +120,7 @@ class Mem0MemoryService(FrameProcessor):
120
120
  try:
121
121
  logger.debug(f"Storing {len(messages)} messages in Mem0")
122
122
  params = {
123
+ "async_mode": True,
123
124
  "messages": messages,
124
125
  "metadata": {"platform": "pipecat"},
125
126
  "output_format": "v1.1",
@@ -163,7 +164,7 @@ class Mem0MemoryService(FrameProcessor):
163
164
  ("run_id", self.run_id),
164
165
  ]
165
166
  clauses = [{name: value} for name, value in id_pairs if value is not None]
166
- filters = {"AND": clauses} if clauses else {}
167
+ filters = {"OR": clauses} if clauses else {}
167
168
  results = self.memory_client.search(
168
169
  query=query,
169
170
  filters=filters,
@@ -12,6 +12,7 @@ from loguru import logger
12
12
  from openai import AsyncStream
13
13
  from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam
14
14
 
15
+ from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
15
16
  from pipecat.frames.frames import FunctionCallFromLLM
16
17
  from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
17
18
  from pipecat.services.openai.llm import OpenAILLMService
@@ -148,9 +149,7 @@ class MistralLLMService(OpenAILLMService):
148
149
  if calls_to_execute:
149
150
  await super().run_function_calls(calls_to_execute)
150
151
 
151
- def build_chat_completion_params(
152
- self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam]
153
- ) -> dict:
152
+ def build_chat_completion_params(self, params_from_context: OpenAILLMInvocationParams) -> dict:
154
153
  """Build parameters for Mistral chat completion request.
155
154
 
156
155
  Handles Mistral-specific requirements including:
@@ -159,14 +158,14 @@ class MistralLLMService(OpenAILLMService):
159
158
  - Core completion settings
160
159
  """
161
160
  # Apply Mistral's assistant prefix requirement for API compatibility
162
- fixed_messages = self._apply_mistral_assistant_prefix(messages)
161
+ fixed_messages = self._apply_mistral_assistant_prefix(params_from_context["messages"])
163
162
 
164
163
  params = {
165
164
  "model": self.model_name,
166
165
  "stream": True,
167
166
  "messages": fixed_messages,
168
- "tools": context.tools,
169
- "tool_choice": context.tool_choice,
167
+ "tools": params_from_context["tools"],
168
+ "tool_choice": params_from_context["tool_choice"],
170
169
  "frequency_penalty": self._settings["frequency_penalty"],
171
170
  "presence_penalty": self._settings["presence_penalty"],
172
171
  "temperature": self._settings["temperature"],
@@ -11,6 +11,7 @@ Microservice) API while maintaining compatibility with the OpenAI-style interfac
11
11
  """
12
12
 
13
13
  from pipecat.metrics.metrics import LLMTokenUsage
14
+ from pipecat.processors.aggregators.llm_context import LLMContext
14
15
  from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
15
16
  from pipecat.services.openai.llm import OpenAILLMService
16
17
 
@@ -47,7 +48,7 @@ class NimLLMService(OpenAILLMService):
47
48
  self._has_reported_prompt_tokens = False
48
49
  self._is_processing = False
49
50
 
50
- async def _process_context(self, context: OpenAILLMContext):
51
+ async def _process_context(self, context: OpenAILLMContext | LLMContext):
51
52
  """Process a context through the LLM and accumulate token usage metrics.
52
53
 
53
54
  This method overrides the parent class implementation to handle NVIDIA's