PyPI - rasa-pro - Versions diffs - 3.14.1__py3-none-any.whl → 3.15.0a1__py3-none-any.whl - Mend

rasa-pro 3.14.1py3-none-any.whl → 3.15.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (29) hide show

rasa/builder/config.py +4 -0
rasa/builder/copilot/copilot.py +28 -9
rasa/builder/copilot/models.py +171 -4
rasa/builder/document_retrieval/inkeep_document_retrieval.py +2 -0
rasa/builder/download.py +1 -1
rasa/builder/service.py +101 -24
rasa/builder/telemetry/__init__.py +0 -0
rasa/builder/telemetry/copilot_langfuse_telemetry.py +384 -0
rasa/builder/{copilot/telemetry.py → telemetry/copilot_segment_telemetry.py} +21 -3
rasa/constants.py +1 -0
rasa/core/policies/flows/flow_executor.py +20 -6
rasa/core/run.py +15 -4
rasa/e2e_test/e2e_config.py +4 -3
rasa/engine/recipes/default_components.py +16 -6
rasa/graph_components/validators/default_recipe_validator.py +10 -4
rasa/nlu/classifiers/diet_classifier.py +2 -0
rasa/shared/core/slots.py +55 -24
rasa/shared/utils/common.py +9 -1
rasa/utils/common.py +9 -0
rasa/utils/endpoints.py +2 -0
rasa/utils/installation_utils.py +111 -0
rasa/utils/tensorflow/callback.py +2 -0
rasa/utils/train_utils.py +2 -0
rasa/version.py +1 -1
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/METADATA +4 -2
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/RECORD +29 -26
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/NOTICE +0 -0
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/WHEEL +0 -0
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/entry_points.txt +0 -0

rasa/builder/config.py CHANGED Viewed

@@ -13,6 +13,10 @@ OPENAI_VECTOR_STORE_ID = os.getenv(
 )
 OPENAI_MAX_VECTOR_RESULTS = int(os.getenv("OPENAI_MAX_VECTOR_RESULTS", "10"))
 OPENAI_TIMEOUT = int(os.getenv("OPENAI_TIMEOUT", "30"))
+# OpenAI Token Pricing Configuration (per 1,000 tokens)
+COPILOT_INPUT_TOKEN_PRICE = float(os.getenv("COPILOT_INPUT_TOKEN_PRICE", "0.002"))
+COPILOT_OUTPUT_TOKEN_PRICE = float(os.getenv("COPILOT_OUTPUT_TOKEN_PRICE", "0.0005"))
+COPILOT_CACHED_TOKEN_PRICE = float(os.getenv("COPILOT_CACHED_TOKEN_PRICE", "0.002"))
 # Server Configuration
 BUILDER_SERVER_HOST = os.getenv("SERVER_HOST", "0.0.0.0")

rasa/builder/copilot/copilot.py CHANGED Viewed

@@ -42,6 +42,7 @@ from rasa.builder.exceptions import (
     DocumentRetrievalError,
 )
 from rasa.builder.shared.tracker_context import TrackerContext
+from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
 from rasa.shared.constants import PACKAGE_NAME
 structlogger = structlog.get_logger()
@@ -72,7 +73,11 @@ class Copilot:
         )
         # The final stream chunk includes usage statistics.
-        self.usage_statistics = UsageStatistics()
+        self.usage_statistics = UsageStatistics(
+            input_token_price=config.COPILOT_INPUT_TOKEN_PRICE,
+            output_token_price=config.COPILOT_OUTPUT_TOKEN_PRICE,
+            cached_token_price=config.COPILOT_CACHED_TOKEN_PRICE,
+        )
     @asynccontextmanager
     async def _get_client(self) -> AsyncGenerator[openai.AsyncOpenAI, None]:
@@ -94,6 +99,16 @@ class Copilot:
                     error=str(exc),
                 )
+    @property
+    def llm_config(self) -> Dict[str, Any]:
+        """The LLM config used to generate the response."""
+        return {
+            "model": config.OPENAI_MODEL,
+            "temperature": config.OPENAI_TEMPERATURE,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        }
     async def search_rasa_documentation(
         self,
         context: CopilotContext,
@@ -108,7 +123,9 @@ class Copilot:
         """
         try:
             query = self._create_documentation_search_query(context)
-            return await self._inkeep_document_retrieval.retrieve_documents(query)
+            documents = await self._inkeep_document_retrieval.retrieve_documents(query)
+            # TODO: Log documentation retrieval to Langfuse
+            return documents
         except DocumentRetrievalError as e:
             structlogger.error(
                 "copilot.search_rasa_documentation.error",
@@ -145,11 +162,12 @@ class Copilot:
             Exception: If an unexpected error occurs.
         """
         relevant_documents = await self.search_rasa_documentation(context)
-        messages = await self._build_messages(context, relevant_documents)
         tracker_event_attachments = self._extract_tracker_event_attachments(
             context.copilot_chat_history[-1]
         )
+        messages = await self._build_messages(context, relevant_documents)
+        # TODO: Delete this after Langfuse is implemented
         support_evidence = CopilotGenerationContext(
             relevant_documents=relevant_documents,
             system_message=messages[0],
@@ -163,6 +181,7 @@ class Copilot:
             support_evidence,
         )
+    @CopilotLangfuseTelemetry.trace_copilot_streaming_generation
     async def _stream_response(
         self, messages: List[Dict[str, Any]]
     ) -> AsyncGenerator[str, None]:
@@ -172,13 +191,10 @@ class Copilot:
         try:
             async with self._get_client() as client:
                 stream = await client.chat.completions.create(
-                    model=config.OPENAI_MODEL,
-                    messages=messages,  # type: ignore
-                    temperature=config.OPENAI_TEMPERATURE,
-                    stream=True,
-                    stream_options={"include_usage": True},
+                    messages=messages,
+                    **self.llm_config,
                 )
-                async for chunk in stream:
+                async for chunk in stream:  # type: ignore[attr-defined]
                     # The final chunk, which contains the usage statistics,
                     # arrives with an empty `choices` list.
                     if not chunk.choices:
@@ -189,6 +205,7 @@ class Copilot:
                     delta = chunk.choices[0].delta
                     if delta and delta.content:
                         yield delta.content
         except openai.OpenAIError as e:
             structlogger.exception("copilot.stream_response.api_error", error=str(e))
             raise CopilotStreamError(
@@ -559,4 +576,6 @@ class Copilot:
         """Extract the tracker event attachments from the message."""
         if not isinstance(message, UserChatMessage):
             return []
+        # TODO: Log tracker event attachments to Langfuse only in the case of the
+        #       User chat message.
         return message.get_content_blocks_by_type(EventContent)

rasa/builder/copilot/models.py CHANGED Viewed

@@ -3,6 +3,7 @@ from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Type, TypeVar, Union
 import structlog
+from openai.types.chat import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from pydantic import (
     BaseModel,
@@ -612,16 +613,171 @@ class TrainingErrorLog(CopilotOutput):
 class UsageStatistics(BaseModel):
-    prompt_tokens: Optional[int] = None
-    completion_tokens: Optional[int] = None
-    total_tokens: Optional[int] = None
-    model: Optional[str] = None
+    """Usage statistics for a copilot generation."""
+    # Token usage statistics
+    prompt_tokens: Optional[int] = Field(
+        default=None,
+        description=(
+            "Total number of prompt tokens used to generate completion. "
+            "Should include cached prompt tokens."
+        ),
+    )
+    completion_tokens: Optional[int] = Field(
+        default=None,
+        description="Number of generated tokens.",
+    )
+    total_tokens: Optional[int] = Field(
+        default=None,
+        description="Total number of tokens used (input + output).",
+    )
+    cached_prompt_tokens: Optional[int] = Field(
+        default=None,
+        description="Number of cached prompt tokens.",
+    )
+    model: Optional[str] = Field(
+        default=None,
+        description="The model used to generate the response.",
+    )
+    # Token prices
+    input_token_price: float = Field(
+        default=0.0,
+        description="Price per 1K input tokens in dollars.",
+    )
+    output_token_price: float = Field(
+        default=0.0,
+        description="Price per 1K output tokens in dollars.",
+    )
+    cached_token_price: float = Field(
+        default=0.0,
+        description="Price per 1K cached tokens in dollars.",
+    )
+    @property
+    def non_cached_prompt_tokens(self) -> Optional[int]:
+        """Get the non-cached prompt tokens."""
+        if self.cached_prompt_tokens is not None and self.prompt_tokens is not None:
+            return self.prompt_tokens - self.cached_prompt_tokens
+        return self.prompt_tokens
+    @property
+    def non_cached_cost(self) -> Optional[float]:
+        """Calculate the non-cached token cost based on configured pricing."""
+        if self.non_cached_prompt_tokens is None:
+            return None
+        if self.non_cached_prompt_tokens == 0:
+            return 0.0
+        return (self.non_cached_prompt_tokens / 1000.0) * self.input_token_price
+    @property
+    def cached_cost(self) -> Optional[float]:
+        """Calculate the cached token cost based on configured pricing."""
+        if self.cached_prompt_tokens is None:
+            return None
+        if self.cached_prompt_tokens == 0:
+            return 0.0
+        return (self.cached_prompt_tokens / 1000.0) * self.cached_token_price
+    @property
+    def input_cost(self) -> Optional[float]:
+        """Calculate the input token cost based on configured pricing.
+        The calculation takes into account the cached prompt tokens (if available) too.
+        """
+        # If both non-cached and cached costs are None, there's no input cost
+        if self.non_cached_cost is None and self.cached_cost is None:
+            return None
+        # If only non-cached cost is available, return it
+        if self.non_cached_cost is not None and self.cached_cost is None:
+            return self.non_cached_cost
+        # If only cached cost is available, return it
+        if self.non_cached_cost is None and self.cached_cost is not None:
+            return self.cached_cost
+        # If both are available, return the sum
+        return self.non_cached_cost + self.cached_cost  # type: ignore[operator]
+    @property
+    def output_cost(self) -> Optional[float]:
+        """Calculate the output token cost based on configured pricing."""
+        if self.completion_tokens is None:
+            return None
+        if self.completion_tokens == 0:
+            return 0.0
+        return (self.completion_tokens / 1000.0) * self.output_token_price
+    @property
+    def total_cost(self) -> Optional[float]:
+        """Calculate the total cost based on configured pricing.
+        Returns:
+            Total cost in dollars, or None if insufficient data.
+        """
+        if self.input_cost is None or self.output_cost is None:
+            return None
+        return self.input_cost + self.output_cost
+    def update_token_prices(
+        self,
+        input_token_price: float,
+        output_token_price: float,
+        cached_token_price: float,
+    ) -> None:
+        """Update token prices with provided values.
+        Args:
+            input_token_price: Price per 1K input tokens in dollars.
+            output_token_price: Price per 1K output tokens in dollars.
+            cached_token_price: Price per 1K cached tokens in dollars.
+        """
+        self.input_token_price = input_token_price
+        self.output_token_price = output_token_price
+        self.cached_token_price = cached_token_price
+    @classmethod
+    def from_chat_completion_response(
+        cls,
+        response: ChatCompletion,
+        input_token_price: float = 0.0,
+        output_token_price: float = 0.0,
+        cached_token_price: float = 0.0,
+    ) -> Optional["UsageStatistics"]:
+        """Create a UsageStatistics object from a ChatCompletionChunk."""
+        if not (usage := getattr(response, "usage", None)):
+            return None
+        usage_statistics = cls(
+            input_token_price=input_token_price,
+            output_token_price=output_token_price,
+            cached_token_price=cached_token_price,
+        )
+        usage_statistics.prompt_tokens = usage.prompt_tokens
+        usage_statistics.completion_tokens = usage.completion_tokens
+        usage_statistics.total_tokens = usage.total_tokens
+        usage_statistics.model = getattr(response, "model", None)
+        # Extract cached tokens if available
+        if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+            usage_statistics.cached_prompt_tokens = getattr(
+                usage.prompt_tokens_details, "cached_tokens", None
+            )
+        return usage_statistics
     def reset(self) -> None:
         """Reset usage statistics to their default values."""
         self.prompt_tokens = None
         self.completion_tokens = None
         self.total_tokens = None
+        self.cached_prompt_tokens = None
         self.model = None
     def update_from_stream_chunk(self, chunk: ChatCompletionChunk) -> None:
@@ -630,14 +786,25 @@ class UsageStatistics(BaseModel):
         Args:
             chunk: The OpenAI stream chunk containing usage statistics.
         """
+        # Reset the usage statistics to their default values
+        self.reset()
+        # If the chunk has no usage statistics, return
         if not (usage := getattr(chunk, "usage", None)):
             return
+        # Update the usage statistics with the values from the chunk
         self.prompt_tokens = usage.prompt_tokens
         self.completion_tokens = usage.completion_tokens
         self.total_tokens = usage.total_tokens
         self.model = getattr(chunk, "model", None)
+        # Extract cached tokens if available
+        if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+            self.cached_prompt_tokens = getattr(
+                usage.prompt_tokens_details, "cached_tokens", None
+            )
 class SigningContext(BaseModel):
     secret: Optional[str] = Field(None)

rasa/builder/document_retrieval/inkeep_document_retrieval.py CHANGED Viewed

@@ -17,6 +17,7 @@ from rasa.builder.document_retrieval.constants import (
 )
 from rasa.builder.document_retrieval.models import Document
 from rasa.builder.exceptions import DocumentRetrievalError
+from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
 from rasa.shared.utils.io import read_json_file
 structlogger = structlog.get_logger()
@@ -88,6 +89,7 @@ class InKeepDocumentRetrieval:
             )
             raise e
+    @CopilotLangfuseTelemetry.trace_document_retrieval_generation
     async def _call_inkeep_rag_api(
         self, query: str, temperature: float, timeout: float
     ) -> ChatCompletion:

rasa/builder/download.py CHANGED Viewed

@@ -27,7 +27,7 @@ def _get_pyproject_toml_content(project_id: str) -> str:
         version = "0.1.0"
         description = "Add your description for your Rasa bot here"
         readme = "README.md"
-        dependencies = ["rasa-pro>=3.13"]
+        dependencies = ["rasa-pro>=3.14"]
         requires-python = ">={sys.version_info.major}.{sys.version_info.minor}"
         """
     )

rasa/builder/service.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 from http import HTTPStatus
 from typing import Any, Optional
+import langfuse
 import structlog
 from sanic import Blueprint, HTTPResponse, response
 from sanic.request import Request
@@ -41,7 +42,6 @@ from rasa.builder.copilot.signing import (
     create_signature_envelope_for_text,
     verify_signature,
 )
-from rasa.builder.copilot.telemetry import CopilotTelemetry
 from rasa.builder.download import create_bot_project_archive
 from rasa.builder.guardrails.constants import (
     BLOCK_SCOPE_PROJECT,
@@ -65,6 +65,7 @@ from rasa.builder.models import (
     ApiErrorResponse,
     AssistantInfo,
     BotData,
+    BotFiles,
     JobCreateResponse,
     JobStatus,
     JobStatusEvent,
@@ -74,6 +75,8 @@ from rasa.builder.models import (
 )
 from rasa.builder.project_generator import ProjectGenerator
 from rasa.builder.shared.tracker_context import TrackerContext
+from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
+from rasa.builder.telemetry.copilot_segment_telemetry import CopilotSegmentTelemetry
 from rasa.core.agent import Agent
 from rasa.core.channels.studio_chat import StudioChatInput
 from rasa.core.exceptions import AgentNotReady
@@ -1020,6 +1023,9 @@ async def download_bot_project(request: Request) -> HTTPResponse:
     schema=str,
 )
 @protected()
+# Disable automatic input/output capture for langfuse tracing
+# This allows manual control over what data is sent to langfuse
+@langfuse.observe(capture_input=False, capture_output=False)
 async def copilot(request: Request) -> None:
     """Handle copilot requests with streaming markdown responses."""
     sse = await request.respond(content_type="text/event-stream")
@@ -1046,9 +1052,12 @@ async def copilot(request: Request) -> None:
             )
             return
-        telemetry = CopilotTelemetry(project_id=HELLO_RASA_PROJECT_ID, user_id=user_id)
+        telemetry = CopilotSegmentTelemetry(
+            project_id=HELLO_RASA_PROJECT_ID, user_id=user_id
+        )
         structlogger.debug("builder.copilot.telemetry.request.init")
+        # TODO: This can be removed once Langfuse is completed.
         if req.last_message and req.last_message.role == ROLE_USER:
             structlogger.debug("builder.copilot.telemetry.request.user_turn")
             # Offload telemetry logging to a background task
@@ -1088,26 +1097,9 @@ async def copilot(request: Request) -> None:
             return
         # 4. Get the necessary context for the copilot
-        tracker = await current_tracker_from_input_channel(request.app, req.session_id)
-        tracker_context = TrackerContext.from_tracker(
-            tracker, max_turns=COPILOT_ASSISTANT_TRACKER_MAX_TURNS
-        )
-        if (
-            tracker_context is not None
-            and llm_service.guardrails_policy_checker is not None
-        ):
-            tracker_context = await llm_service.guardrails_policy_checker.check_assistant_chat_for_policy_violations(  # noqa: E501
-                tracker_context=tracker_context,
-                hello_rasa_user_id=user_id,
-                hello_rasa_project_id=HELLO_RASA_PROJECT_ID,
-                lakera_project_id=LAKERA_ASSISTANT_HISTORY_GUARDRAIL_PROJECT_ID,
-            )
-        # Copilot doesn't need to know about the docs and any file that is not a core
-        # assistant file
-        relevant_assistant_files = project_generator.get_bot_files(
-            exclude_docs_directory=True,
-            allowed_file_extensions=["yaml", "yml", "py", "jinja", "jinja2"],
+        tracker_context = await get_tracker_context_for_copilot(request, req, user_id)
+        relevant_assistant_files = get_relevant_assistant_files_for_copilot(
+            project_generator,
         )
         context = CopilotContext(
             tracker_context=tracker_context,
@@ -1162,7 +1154,7 @@ async def copilot(request: Request) -> None:
         async for token in intercepted_stream:
             await sse.send(token.to_sse_event().format())
-        # 8. Offload telemetry logging to a background task
+        # 8a. Offload metabase telemetry logging to a background task
         request.app.add_task(
             asyncio.to_thread(
                 telemetry.log_copilot_from_handler,
@@ -1177,9 +1169,27 @@ async def copilot(request: Request) -> None:
                     else None
                 ),
                 tracker_event_attachments=generation_context.tracker_event_attachments,
-                **copilot_client.usage_statistics.model_dump(),
+                model=copilot_client.usage_statistics.model or "N/A",
+                prompt_tokens=copilot_client.usage_statistics.prompt_tokens or 0,
+                cached_prompt_tokens=(
+                    copilot_client.usage_statistics.cached_prompt_tokens or 0
+                ),
+                completion_tokens=(
+                    copilot_client.usage_statistics.completion_tokens or 0
+                ),
+                total_tokens=copilot_client.usage_statistics.total_tokens or 0,
             )
         )
+        # 8b. Setup output trace attributes for Langfuse
+        CopilotLangfuseTelemetry.setup_copilot_endpoint_call_trace_attributes(
+            hello_rasa_project_id=HELLO_RASA_PROJECT_ID or "N/A",
+            chat_id=req.session_id or "N/A",
+            user_id=user_id,
+            request=req,
+            handler=copilot_response_handler,
+            relevant_documents=generation_context.relevant_documents,
+            copilot_context=context,
+        )
         # 9. Once the stream is over, extract and send references
         #    if any documents were used
@@ -1365,3 +1375,70 @@ async def _handle_guardrail_violation_and_maybe_block(
     await sse.send(message.to_sse_event().format())
     return message
+@langfuse.observe(capture_input=False, capture_output=False)
+async def get_tracker_context_for_copilot(
+    request: Request,
+    req: CopilotRequest,
+    user_id: str,
+) -> Optional[TrackerContext]:
+    """Check the assistant chat for guardrail policy violations.
+    Args:
+        request: The request object.
+        req: The CopilotRequest object.
+        user_id: The user ID.
+    Returns:
+        The tracker context if the tracker is available.
+    """
+    tracker = await current_tracker_from_input_channel(request.app, req.session_id)
+    tracker_context = TrackerContext.from_tracker(
+        tracker, max_turns=COPILOT_ASSISTANT_TRACKER_MAX_TURNS
+    )
+    if (
+        tracker_context is not None
+        and llm_service.guardrails_policy_checker is not None
+    ):
+        tracker_context = await llm_service.guardrails_policy_checker.check_assistant_chat_for_policy_violations(  # noqa: E501
+            tracker_context=tracker_context,
+            hello_rasa_user_id=user_id,
+            hello_rasa_project_id=HELLO_RASA_PROJECT_ID,
+            lakera_project_id=LAKERA_ASSISTANT_HISTORY_GUARDRAIL_PROJECT_ID,
+        )
+    # Track the retrieved tracker context
+    CopilotLangfuseTelemetry.trace_copilot_tracker_context(
+        tracker_context=tracker_context,
+        max_conversation_turns=COPILOT_ASSISTANT_TRACKER_MAX_TURNS,
+        session_id=req.session_id,
+    )
+    return tracker_context
+@langfuse.observe(capture_input=False, capture_output=False)
+def get_relevant_assistant_files_for_copilot(
+    project_generator: ProjectGenerator,
+) -> BotFiles:
+    """Get the relevant assistant files for the copilot.
+    Args:
+        project_generator: The project generator.
+    Returns:
+        The relevant assistant files.
+    """
+    # Copilot doesn't need to know about the docs and any file that is not a core
+    # assistant file
+    files = project_generator.get_bot_files(
+        exclude_docs_directory=True,
+        allowed_file_extensions=["yaml", "yml", "py", "jinja", "jinja2"],
+    )
+    # Track the retrieved assistant files
+    CopilotLangfuseTelemetry.trace_copilot_relevant_assistant_files(
+        relevant_assistant_files=files,
+    )
+    return files

rasa/builder/telemetry/__init__.py ADDED Viewed

File without changes

rasa-pro 3.14.1__py3-none-any.whl → 3.15.0a1__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.14.1py3-none-any.whl → 3.15.0a1py3-none-any.whl