rasa-pro 3.14.1__py3-none-any.whl → 3.15.0.dev20251027__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/builder/config.py +4 -0
- rasa/builder/copilot/copilot.py +28 -9
- rasa/builder/copilot/models.py +251 -32
- rasa/builder/document_retrieval/inkeep_document_retrieval.py +2 -0
- rasa/builder/download.py +1 -1
- rasa/builder/evaluator/__init__.py +0 -0
- rasa/builder/evaluator/constants.py +15 -0
- rasa/builder/evaluator/copilot_executor.py +89 -0
- rasa/builder/evaluator/dataset/models.py +173 -0
- rasa/builder/evaluator/exceptions.py +4 -0
- rasa/builder/evaluator/response_classification/__init__.py +0 -0
- rasa/builder/evaluator/response_classification/constants.py +66 -0
- rasa/builder/evaluator/response_classification/evaluator.py +346 -0
- rasa/builder/evaluator/response_classification/langfuse_runner.py +463 -0
- rasa/builder/evaluator/response_classification/models.py +61 -0
- rasa/builder/evaluator/scripts/__init__.py +0 -0
- rasa/builder/evaluator/scripts/run_response_classification_evaluator.py +152 -0
- rasa/builder/service.py +101 -24
- rasa/builder/telemetry/__init__.py +0 -0
- rasa/builder/telemetry/copilot_langfuse_telemetry.py +384 -0
- rasa/builder/{copilot/telemetry.py → telemetry/copilot_segment_telemetry.py} +21 -3
- rasa/constants.py +1 -0
- rasa/core/policies/flows/flow_executor.py +20 -6
- rasa/core/run.py +15 -4
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +15 -7
- rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +15 -8
- rasa/e2e_test/e2e_config.py +4 -3
- rasa/engine/recipes/default_components.py +16 -6
- rasa/graph_components/validators/default_recipe_validator.py +10 -4
- rasa/nlu/classifiers/diet_classifier.py +2 -0
- rasa/shared/core/slots.py +55 -24
- rasa/shared/utils/common.py +9 -1
- rasa/utils/common.py +9 -0
- rasa/utils/endpoints.py +2 -0
- rasa/utils/installation_utils.py +111 -0
- rasa/utils/tensorflow/callback.py +2 -0
- rasa/utils/train_utils.py +2 -0
- rasa/version.py +1 -1
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/METADATA +4 -2
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/RECORD +43 -28
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/NOTICE +0 -0
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/WHEEL +0 -0
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/entry_points.txt +0 -0
rasa/builder/config.py
CHANGED
|
@@ -13,6 +13,10 @@ OPENAI_VECTOR_STORE_ID = os.getenv(
|
|
|
13
13
|
)
|
|
14
14
|
OPENAI_MAX_VECTOR_RESULTS = int(os.getenv("OPENAI_MAX_VECTOR_RESULTS", "10"))
|
|
15
15
|
OPENAI_TIMEOUT = int(os.getenv("OPENAI_TIMEOUT", "30"))
|
|
16
|
+
# OpenAI Token Pricing Configuration (per 1,000 tokens)
|
|
17
|
+
COPILOT_INPUT_TOKEN_PRICE = float(os.getenv("COPILOT_INPUT_TOKEN_PRICE", "0.002"))
|
|
18
|
+
COPILOT_OUTPUT_TOKEN_PRICE = float(os.getenv("COPILOT_OUTPUT_TOKEN_PRICE", "0.0005"))
|
|
19
|
+
COPILOT_CACHED_TOKEN_PRICE = float(os.getenv("COPILOT_CACHED_TOKEN_PRICE", "0.002"))
|
|
16
20
|
|
|
17
21
|
# Server Configuration
|
|
18
22
|
BUILDER_SERVER_HOST = os.getenv("SERVER_HOST", "0.0.0.0")
|
rasa/builder/copilot/copilot.py
CHANGED
|
@@ -42,6 +42,7 @@ from rasa.builder.exceptions import (
|
|
|
42
42
|
DocumentRetrievalError,
|
|
43
43
|
)
|
|
44
44
|
from rasa.builder.shared.tracker_context import TrackerContext
|
|
45
|
+
from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
|
|
45
46
|
from rasa.shared.constants import PACKAGE_NAME
|
|
46
47
|
|
|
47
48
|
structlogger = structlog.get_logger()
|
|
@@ -72,7 +73,11 @@ class Copilot:
|
|
|
72
73
|
)
|
|
73
74
|
|
|
74
75
|
# The final stream chunk includes usage statistics.
|
|
75
|
-
self.usage_statistics = UsageStatistics(
|
|
76
|
+
self.usage_statistics = UsageStatistics(
|
|
77
|
+
input_token_price=config.COPILOT_INPUT_TOKEN_PRICE,
|
|
78
|
+
output_token_price=config.COPILOT_OUTPUT_TOKEN_PRICE,
|
|
79
|
+
cached_token_price=config.COPILOT_CACHED_TOKEN_PRICE,
|
|
80
|
+
)
|
|
76
81
|
|
|
77
82
|
@asynccontextmanager
|
|
78
83
|
async def _get_client(self) -> AsyncGenerator[openai.AsyncOpenAI, None]:
|
|
@@ -94,6 +99,16 @@ class Copilot:
|
|
|
94
99
|
error=str(exc),
|
|
95
100
|
)
|
|
96
101
|
|
|
102
|
+
@property
|
|
103
|
+
def llm_config(self) -> Dict[str, Any]:
|
|
104
|
+
"""The LLM config used to generate the response."""
|
|
105
|
+
return {
|
|
106
|
+
"model": config.OPENAI_MODEL,
|
|
107
|
+
"temperature": config.OPENAI_TEMPERATURE,
|
|
108
|
+
"stream": True,
|
|
109
|
+
"stream_options": {"include_usage": True},
|
|
110
|
+
}
|
|
111
|
+
|
|
97
112
|
async def search_rasa_documentation(
|
|
98
113
|
self,
|
|
99
114
|
context: CopilotContext,
|
|
@@ -108,7 +123,9 @@ class Copilot:
|
|
|
108
123
|
"""
|
|
109
124
|
try:
|
|
110
125
|
query = self._create_documentation_search_query(context)
|
|
111
|
-
|
|
126
|
+
documents = await self._inkeep_document_retrieval.retrieve_documents(query)
|
|
127
|
+
# TODO: Log documentation retrieval to Langfuse
|
|
128
|
+
return documents
|
|
112
129
|
except DocumentRetrievalError as e:
|
|
113
130
|
structlogger.error(
|
|
114
131
|
"copilot.search_rasa_documentation.error",
|
|
@@ -145,11 +162,12 @@ class Copilot:
|
|
|
145
162
|
Exception: If an unexpected error occurs.
|
|
146
163
|
"""
|
|
147
164
|
relevant_documents = await self.search_rasa_documentation(context)
|
|
148
|
-
messages = await self._build_messages(context, relevant_documents)
|
|
149
165
|
tracker_event_attachments = self._extract_tracker_event_attachments(
|
|
150
166
|
context.copilot_chat_history[-1]
|
|
151
167
|
)
|
|
168
|
+
messages = await self._build_messages(context, relevant_documents)
|
|
152
169
|
|
|
170
|
+
# TODO: Delete this after Langfuse is implemented
|
|
153
171
|
support_evidence = CopilotGenerationContext(
|
|
154
172
|
relevant_documents=relevant_documents,
|
|
155
173
|
system_message=messages[0],
|
|
@@ -163,6 +181,7 @@ class Copilot:
|
|
|
163
181
|
support_evidence,
|
|
164
182
|
)
|
|
165
183
|
|
|
184
|
+
@CopilotLangfuseTelemetry.trace_copilot_streaming_generation
|
|
166
185
|
async def _stream_response(
|
|
167
186
|
self, messages: List[Dict[str, Any]]
|
|
168
187
|
) -> AsyncGenerator[str, None]:
|
|
@@ -172,13 +191,10 @@ class Copilot:
|
|
|
172
191
|
try:
|
|
173
192
|
async with self._get_client() as client:
|
|
174
193
|
stream = await client.chat.completions.create(
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
temperature=config.OPENAI_TEMPERATURE,
|
|
178
|
-
stream=True,
|
|
179
|
-
stream_options={"include_usage": True},
|
|
194
|
+
messages=messages,
|
|
195
|
+
**self.llm_config,
|
|
180
196
|
)
|
|
181
|
-
async for chunk in stream:
|
|
197
|
+
async for chunk in stream: # type: ignore[attr-defined]
|
|
182
198
|
# The final chunk, which contains the usage statistics,
|
|
183
199
|
# arrives with an empty `choices` list.
|
|
184
200
|
if not chunk.choices:
|
|
@@ -189,6 +205,7 @@ class Copilot:
|
|
|
189
205
|
delta = chunk.choices[0].delta
|
|
190
206
|
if delta and delta.content:
|
|
191
207
|
yield delta.content
|
|
208
|
+
|
|
192
209
|
except openai.OpenAIError as e:
|
|
193
210
|
structlogger.exception("copilot.stream_response.api_error", error=str(e))
|
|
194
211
|
raise CopilotStreamError(
|
|
@@ -559,4 +576,6 @@ class Copilot:
|
|
|
559
576
|
"""Extract the tracker event attachments from the message."""
|
|
560
577
|
if not isinstance(message, UserChatMessage):
|
|
561
578
|
return []
|
|
579
|
+
# TODO: Log tracker event attachments to Langfuse only in the case of the
|
|
580
|
+
# User chat message.
|
|
562
581
|
return message.get_content_blocks_by_type(EventContent)
|
rasa/builder/copilot/models.py
CHANGED
|
@@ -3,6 +3,7 @@ from enum import Enum
|
|
|
3
3
|
from typing import Any, Dict, List, Literal, Optional, Type, TypeVar, Union
|
|
4
4
|
|
|
5
5
|
import structlog
|
|
6
|
+
from openai.types.chat import ChatCompletion
|
|
6
7
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
7
8
|
from pydantic import (
|
|
8
9
|
BaseModel,
|
|
@@ -343,6 +344,55 @@ ChatMessage = Union[
|
|
|
343
344
|
]
|
|
344
345
|
|
|
345
346
|
|
|
347
|
+
def create_chat_message_from_dict(message_data: Dict[str, Any]) -> ChatMessage:
|
|
348
|
+
"""Parse a single chat message dictionary into a ChatMessage object.
|
|
349
|
+
|
|
350
|
+
This utility function manually parses a chat message dictionary into the
|
|
351
|
+
appropriate ChatMessage type based on its role field.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
message_data: Dictionary containing chat message data
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Parsed ChatMessage object
|
|
358
|
+
|
|
359
|
+
Raises:
|
|
360
|
+
ValueError: If an unknown role is encountered
|
|
361
|
+
|
|
362
|
+
Example:
|
|
363
|
+
>>> message_data = {
|
|
364
|
+
... "role": "user",
|
|
365
|
+
... "content": [{"type": "text", "text": "Hello"}]
|
|
366
|
+
... }
|
|
367
|
+
>>> message = parse_chat_message_from_dict(message_data)
|
|
368
|
+
>>> isinstance(message, UserChatMessage)
|
|
369
|
+
True
|
|
370
|
+
>>> message.role
|
|
371
|
+
'user'
|
|
372
|
+
"""
|
|
373
|
+
available_roles = [ROLE_USER, ROLE_COPILOT, ROLE_COPILOT_INTERNAL]
|
|
374
|
+
role = message_data.get("role")
|
|
375
|
+
|
|
376
|
+
if role == ROLE_USER:
|
|
377
|
+
return UserChatMessage(**message_data)
|
|
378
|
+
elif role == ROLE_COPILOT:
|
|
379
|
+
return CopilotChatMessage(**message_data)
|
|
380
|
+
elif role == ROLE_COPILOT_INTERNAL:
|
|
381
|
+
return InternalCopilotRequestChatMessage(**message_data)
|
|
382
|
+
else:
|
|
383
|
+
message = (
|
|
384
|
+
f"Unknown role '{role}' in chat message. "
|
|
385
|
+
f"Available roles are: {', '.join(available_roles)}."
|
|
386
|
+
)
|
|
387
|
+
structlogger.error(
|
|
388
|
+
"models.create_chat_message_from_dict.unknown_role",
|
|
389
|
+
event_info=message,
|
|
390
|
+
role=role,
|
|
391
|
+
available_roles=available_roles,
|
|
392
|
+
)
|
|
393
|
+
raise ValueError(message)
|
|
394
|
+
|
|
395
|
+
|
|
346
396
|
class CopilotContext(BaseModel):
|
|
347
397
|
"""Model containing the context used by the copilot to generate a response."""
|
|
348
398
|
|
|
@@ -390,37 +440,40 @@ class CopilotRequest(BaseModel):
|
|
|
390
440
|
|
|
391
441
|
@field_validator("copilot_chat_history", mode="before")
|
|
392
442
|
@classmethod
|
|
393
|
-
def parse_chat_history(
|
|
443
|
+
def parse_chat_history(
|
|
444
|
+
cls, v: Union[List[Dict[str, Any]], List[ChatMessage]]
|
|
445
|
+
) -> List[ChatMessage]:
|
|
394
446
|
"""Manually parse chat history messages based on role field."""
|
|
447
|
+
# If already parsed ChatMessage objects, return them as-is
|
|
448
|
+
if (
|
|
449
|
+
v
|
|
450
|
+
and isinstance(v, list)
|
|
451
|
+
and all(isinstance(item, ChatMessage) for item in v)
|
|
452
|
+
):
|
|
453
|
+
return v # type: ignore[return-value]
|
|
454
|
+
|
|
455
|
+
# Check for mixed types (some ChatMessage, some not)
|
|
456
|
+
if (
|
|
457
|
+
v
|
|
458
|
+
and isinstance(v, list)
|
|
459
|
+
and any(isinstance(item, ChatMessage) for item in v)
|
|
460
|
+
):
|
|
461
|
+
message = (
|
|
462
|
+
"Mixed types in copilot_chat_history: cannot mix ChatMessage objects"
|
|
463
|
+
"with other types."
|
|
464
|
+
)
|
|
465
|
+
structlog.get_logger().error(
|
|
466
|
+
"copilot_request.parse_chat_history.mixed_types",
|
|
467
|
+
event_info=message,
|
|
468
|
+
chat_history_types=[type(item) for item in v],
|
|
469
|
+
)
|
|
470
|
+
raise ValueError(message)
|
|
471
|
+
|
|
472
|
+
# Otherwise, parse from dictionaries
|
|
395
473
|
parsed_messages: List[ChatMessage] = []
|
|
396
|
-
available_roles = [ROLE_USER, ROLE_COPILOT, ROLE_COPILOT_INTERNAL]
|
|
397
474
|
for message_data in v:
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
if role == ROLE_USER:
|
|
401
|
-
parsed_messages.append(UserChatMessage(**message_data))
|
|
402
|
-
|
|
403
|
-
elif role == ROLE_COPILOT:
|
|
404
|
-
parsed_messages.append(CopilotChatMessage(**message_data))
|
|
405
|
-
|
|
406
|
-
elif role == ROLE_COPILOT_INTERNAL:
|
|
407
|
-
parsed_messages.append(
|
|
408
|
-
InternalCopilotRequestChatMessage(**message_data)
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
else:
|
|
412
|
-
message = (
|
|
413
|
-
f"Unknown role '{role}' in chat message. "
|
|
414
|
-
f"Available roles are: {', '.join(available_roles)}."
|
|
415
|
-
)
|
|
416
|
-
structlogger.error(
|
|
417
|
-
"copilot_request.parse_chat_history.unknown_role",
|
|
418
|
-
event_info=message,
|
|
419
|
-
role=role,
|
|
420
|
-
available_roles=available_roles,
|
|
421
|
-
)
|
|
422
|
-
raise ValueError(message)
|
|
423
|
-
|
|
475
|
+
chat_message = create_chat_message_from_dict(message_data)
|
|
476
|
+
parsed_messages.append(chat_message)
|
|
424
477
|
return parsed_messages
|
|
425
478
|
|
|
426
479
|
@property
|
|
@@ -612,16 +665,171 @@ class TrainingErrorLog(CopilotOutput):
|
|
|
612
665
|
|
|
613
666
|
|
|
614
667
|
class UsageStatistics(BaseModel):
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
668
|
+
"""Usage statistics for a copilot generation."""
|
|
669
|
+
|
|
670
|
+
# Token usage statistics
|
|
671
|
+
prompt_tokens: Optional[int] = Field(
|
|
672
|
+
default=None,
|
|
673
|
+
description=(
|
|
674
|
+
"Total number of prompt tokens used to generate completion. "
|
|
675
|
+
"Should include cached prompt tokens."
|
|
676
|
+
),
|
|
677
|
+
)
|
|
678
|
+
completion_tokens: Optional[int] = Field(
|
|
679
|
+
default=None,
|
|
680
|
+
description="Number of generated tokens.",
|
|
681
|
+
)
|
|
682
|
+
total_tokens: Optional[int] = Field(
|
|
683
|
+
default=None,
|
|
684
|
+
description="Total number of tokens used (input + output).",
|
|
685
|
+
)
|
|
686
|
+
cached_prompt_tokens: Optional[int] = Field(
|
|
687
|
+
default=None,
|
|
688
|
+
description="Number of cached prompt tokens.",
|
|
689
|
+
)
|
|
690
|
+
model: Optional[str] = Field(
|
|
691
|
+
default=None,
|
|
692
|
+
description="The model used to generate the response.",
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Token prices
|
|
696
|
+
input_token_price: float = Field(
|
|
697
|
+
default=0.0,
|
|
698
|
+
description="Price per 1K input tokens in dollars.",
|
|
699
|
+
)
|
|
700
|
+
output_token_price: float = Field(
|
|
701
|
+
default=0.0,
|
|
702
|
+
description="Price per 1K output tokens in dollars.",
|
|
703
|
+
)
|
|
704
|
+
cached_token_price: float = Field(
|
|
705
|
+
default=0.0,
|
|
706
|
+
description="Price per 1K cached tokens in dollars.",
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
@property
|
|
710
|
+
def non_cached_prompt_tokens(self) -> Optional[int]:
|
|
711
|
+
"""Get the non-cached prompt tokens."""
|
|
712
|
+
if self.cached_prompt_tokens is not None and self.prompt_tokens is not None:
|
|
713
|
+
return self.prompt_tokens - self.cached_prompt_tokens
|
|
714
|
+
return self.prompt_tokens
|
|
715
|
+
|
|
716
|
+
@property
|
|
717
|
+
def non_cached_cost(self) -> Optional[float]:
|
|
718
|
+
"""Calculate the non-cached token cost based on configured pricing."""
|
|
719
|
+
if self.non_cached_prompt_tokens is None:
|
|
720
|
+
return None
|
|
721
|
+
if self.non_cached_prompt_tokens == 0:
|
|
722
|
+
return 0.0
|
|
723
|
+
|
|
724
|
+
return (self.non_cached_prompt_tokens / 1000.0) * self.input_token_price
|
|
725
|
+
|
|
726
|
+
@property
|
|
727
|
+
def cached_cost(self) -> Optional[float]:
|
|
728
|
+
"""Calculate the cached token cost based on configured pricing."""
|
|
729
|
+
if self.cached_prompt_tokens is None:
|
|
730
|
+
return None
|
|
731
|
+
if self.cached_prompt_tokens == 0:
|
|
732
|
+
return 0.0
|
|
733
|
+
|
|
734
|
+
return (self.cached_prompt_tokens / 1000.0) * self.cached_token_price
|
|
735
|
+
|
|
736
|
+
@property
|
|
737
|
+
def input_cost(self) -> Optional[float]:
|
|
738
|
+
"""Calculate the input token cost based on configured pricing.
|
|
739
|
+
|
|
740
|
+
The calculation takes into account the cached prompt tokens (if available) too.
|
|
741
|
+
"""
|
|
742
|
+
# If both non-cached and cached costs are None, there's no input cost
|
|
743
|
+
if self.non_cached_cost is None and self.cached_cost is None:
|
|
744
|
+
return None
|
|
745
|
+
|
|
746
|
+
# If only non-cached cost is available, return it
|
|
747
|
+
if self.non_cached_cost is not None and self.cached_cost is None:
|
|
748
|
+
return self.non_cached_cost
|
|
749
|
+
|
|
750
|
+
# If only cached cost is available, return it
|
|
751
|
+
if self.non_cached_cost is None and self.cached_cost is not None:
|
|
752
|
+
return self.cached_cost
|
|
753
|
+
|
|
754
|
+
# If both are available, return the sum
|
|
755
|
+
return self.non_cached_cost + self.cached_cost # type: ignore[operator]
|
|
756
|
+
|
|
757
|
+
@property
|
|
758
|
+
def output_cost(self) -> Optional[float]:
|
|
759
|
+
"""Calculate the output token cost based on configured pricing."""
|
|
760
|
+
if self.completion_tokens is None:
|
|
761
|
+
return None
|
|
762
|
+
if self.completion_tokens == 0:
|
|
763
|
+
return 0.0
|
|
764
|
+
|
|
765
|
+
return (self.completion_tokens / 1000.0) * self.output_token_price
|
|
766
|
+
|
|
767
|
+
@property
|
|
768
|
+
def total_cost(self) -> Optional[float]:
|
|
769
|
+
"""Calculate the total cost based on configured pricing.
|
|
770
|
+
|
|
771
|
+
Returns:
|
|
772
|
+
Total cost in dollars, or None if insufficient data.
|
|
773
|
+
"""
|
|
774
|
+
if self.input_cost is None or self.output_cost is None:
|
|
775
|
+
return None
|
|
776
|
+
|
|
777
|
+
return self.input_cost + self.output_cost
|
|
778
|
+
|
|
779
|
+
def update_token_prices(
|
|
780
|
+
self,
|
|
781
|
+
input_token_price: float,
|
|
782
|
+
output_token_price: float,
|
|
783
|
+
cached_token_price: float,
|
|
784
|
+
) -> None:
|
|
785
|
+
"""Update token prices with provided values.
|
|
786
|
+
|
|
787
|
+
Args:
|
|
788
|
+
input_token_price: Price per 1K input tokens in dollars.
|
|
789
|
+
output_token_price: Price per 1K output tokens in dollars.
|
|
790
|
+
cached_token_price: Price per 1K cached tokens in dollars.
|
|
791
|
+
"""
|
|
792
|
+
self.input_token_price = input_token_price
|
|
793
|
+
self.output_token_price = output_token_price
|
|
794
|
+
self.cached_token_price = cached_token_price
|
|
795
|
+
|
|
796
|
+
@classmethod
|
|
797
|
+
def from_chat_completion_response(
|
|
798
|
+
cls,
|
|
799
|
+
response: ChatCompletion,
|
|
800
|
+
input_token_price: float = 0.0,
|
|
801
|
+
output_token_price: float = 0.0,
|
|
802
|
+
cached_token_price: float = 0.0,
|
|
803
|
+
) -> Optional["UsageStatistics"]:
|
|
804
|
+
"""Create a UsageStatistics object from a ChatCompletionChunk."""
|
|
805
|
+
if not (usage := getattr(response, "usage", None)):
|
|
806
|
+
return None
|
|
807
|
+
|
|
808
|
+
usage_statistics = cls(
|
|
809
|
+
input_token_price=input_token_price,
|
|
810
|
+
output_token_price=output_token_price,
|
|
811
|
+
cached_token_price=cached_token_price,
|
|
812
|
+
)
|
|
813
|
+
|
|
814
|
+
usage_statistics.prompt_tokens = usage.prompt_tokens
|
|
815
|
+
usage_statistics.completion_tokens = usage.completion_tokens
|
|
816
|
+
usage_statistics.total_tokens = usage.total_tokens
|
|
817
|
+
usage_statistics.model = getattr(response, "model", None)
|
|
818
|
+
|
|
819
|
+
# Extract cached tokens if available
|
|
820
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
|
821
|
+
usage_statistics.cached_prompt_tokens = getattr(
|
|
822
|
+
usage.prompt_tokens_details, "cached_tokens", None
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
return usage_statistics
|
|
619
826
|
|
|
620
827
|
def reset(self) -> None:
|
|
621
828
|
"""Reset usage statistics to their default values."""
|
|
622
829
|
self.prompt_tokens = None
|
|
623
830
|
self.completion_tokens = None
|
|
624
831
|
self.total_tokens = None
|
|
832
|
+
self.cached_prompt_tokens = None
|
|
625
833
|
self.model = None
|
|
626
834
|
|
|
627
835
|
def update_from_stream_chunk(self, chunk: ChatCompletionChunk) -> None:
|
|
@@ -630,14 +838,25 @@ class UsageStatistics(BaseModel):
|
|
|
630
838
|
Args:
|
|
631
839
|
chunk: The OpenAI stream chunk containing usage statistics.
|
|
632
840
|
"""
|
|
841
|
+
# Reset the usage statistics to their default values
|
|
842
|
+
self.reset()
|
|
843
|
+
|
|
844
|
+
# If the chunk has no usage statistics, return
|
|
633
845
|
if not (usage := getattr(chunk, "usage", None)):
|
|
634
846
|
return
|
|
635
847
|
|
|
848
|
+
# Update the usage statistics with the values from the chunk
|
|
636
849
|
self.prompt_tokens = usage.prompt_tokens
|
|
637
850
|
self.completion_tokens = usage.completion_tokens
|
|
638
851
|
self.total_tokens = usage.total_tokens
|
|
639
852
|
self.model = getattr(chunk, "model", None)
|
|
640
853
|
|
|
854
|
+
# Extract cached tokens if available
|
|
855
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
|
856
|
+
self.cached_prompt_tokens = getattr(
|
|
857
|
+
usage.prompt_tokens_details, "cached_tokens", None
|
|
858
|
+
)
|
|
859
|
+
|
|
641
860
|
|
|
642
861
|
class SigningContext(BaseModel):
|
|
643
862
|
secret: Optional[str] = Field(None)
|
|
@@ -17,6 +17,7 @@ from rasa.builder.document_retrieval.constants import (
|
|
|
17
17
|
)
|
|
18
18
|
from rasa.builder.document_retrieval.models import Document
|
|
19
19
|
from rasa.builder.exceptions import DocumentRetrievalError
|
|
20
|
+
from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
|
|
20
21
|
from rasa.shared.utils.io import read_json_file
|
|
21
22
|
|
|
22
23
|
structlogger = structlog.get_logger()
|
|
@@ -88,6 +89,7 @@ class InKeepDocumentRetrieval:
|
|
|
88
89
|
)
|
|
89
90
|
raise e
|
|
90
91
|
|
|
92
|
+
@CopilotLangfuseTelemetry.trace_document_retrieval_generation
|
|
91
93
|
async def _call_inkeep_rag_api(
|
|
92
94
|
self, query: str, temperature: float, timeout: float
|
|
93
95
|
) -> ChatCompletion:
|
rasa/builder/download.py
CHANGED
|
@@ -27,7 +27,7 @@ def _get_pyproject_toml_content(project_id: str) -> str:
|
|
|
27
27
|
version = "0.1.0"
|
|
28
28
|
description = "Add your description for your Rasa bot here"
|
|
29
29
|
readme = "README.md"
|
|
30
|
-
dependencies = ["rasa-pro>=3.
|
|
30
|
+
dependencies = ["rasa-pro>=3.14"]
|
|
31
31
|
requires-python = ">={sys.version_info.major}.{sys.version_info.minor}"
|
|
32
32
|
"""
|
|
33
33
|
)
|
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Constants for the evaluator module."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
# Base directory for the rasa package
|
|
6
|
+
BASE_DIR = Path(__file__).parent.parent.parent
|
|
7
|
+
|
|
8
|
+
# Response classification evaluation results directory
|
|
9
|
+
RESPONSE_CLASSIFICATION_EVALUATION_RESULTS_DIR = (
|
|
10
|
+
BASE_DIR / "builder" / "evaluator" / "results"
|
|
11
|
+
)
|
|
12
|
+
# Default output filename
|
|
13
|
+
DEFAULT_RESPONSE_CLASSIFICATION_EVALUATION_TEXT_OUTPUT_FILENAME = "run_results.txt"
|
|
14
|
+
# Default YAML output filename
|
|
15
|
+
RESPONSE_CLASSIFICATION_EVALUATION_YAML_OUTPUT_FILENAME = "run_results.yaml"
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Copilot execution utilities for evaluators.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for running copilot operations in evaluation contexts,
|
|
4
|
+
independent of specific evaluation frameworks like Langfuse.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
import structlog
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from rasa.builder.config import COPILOT_HANDLER_ROLLING_BUFFER_SIZE
|
|
13
|
+
from rasa.builder.copilot.models import (
|
|
14
|
+
CopilotContext,
|
|
15
|
+
CopilotGenerationContext,
|
|
16
|
+
GeneratedContent,
|
|
17
|
+
ReferenceSection,
|
|
18
|
+
ResponseCategory,
|
|
19
|
+
)
|
|
20
|
+
from rasa.builder.llm_service import llm_service
|
|
21
|
+
|
|
22
|
+
structlogger = structlog.get_logger()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CopilotRunResult(BaseModel):
|
|
26
|
+
"""Result from running the copilot with response handler."""
|
|
27
|
+
|
|
28
|
+
complete_response: Optional[str]
|
|
29
|
+
response_category: Optional[ResponseCategory]
|
|
30
|
+
reference_section: Optional[ReferenceSection]
|
|
31
|
+
generation_context: CopilotGenerationContext
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def run_copilot_with_response_handler(
|
|
35
|
+
context: CopilotContext,
|
|
36
|
+
) -> Optional[CopilotRunResult]:
|
|
37
|
+
"""Run the copilot with response handler on the given context.
|
|
38
|
+
|
|
39
|
+
This function encapsulates the core copilot execution logic. It handles:
|
|
40
|
+
- Instantiating the copilot and response handler
|
|
41
|
+
- Generating a response and extracting the reference section from the given context
|
|
42
|
+
- Returning structured results
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
context: The copilot context to process.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
CopilotRunResult containing the complete response, category, and generation
|
|
49
|
+
context, or None if execution fails.
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
Any exceptions from the copilot or response handler execution.
|
|
53
|
+
"""
|
|
54
|
+
# Instantiate the copilot and response handler
|
|
55
|
+
copilot = llm_service.instantiate_copilot()
|
|
56
|
+
copilot_response_handler = llm_service.instantiate_handler(
|
|
57
|
+
COPILOT_HANDLER_ROLLING_BUFFER_SIZE
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Call the copilot to generate a response and handle it with the response
|
|
61
|
+
# handler
|
|
62
|
+
(original_stream, generation_context) = await copilot.generate_response(context)
|
|
63
|
+
intercepted_stream = copilot_response_handler.handle_response(original_stream)
|
|
64
|
+
|
|
65
|
+
# Exhaust the stream to get the complete response for evaluation
|
|
66
|
+
response_chunks: List[str] = []
|
|
67
|
+
response_category = None
|
|
68
|
+
async for chunk in intercepted_stream:
|
|
69
|
+
if not isinstance(chunk, GeneratedContent):
|
|
70
|
+
continue
|
|
71
|
+
response_chunks.append(chunk.content)
|
|
72
|
+
response_category = chunk.response_category
|
|
73
|
+
|
|
74
|
+
complete_response = "".join(response_chunks) if response_chunks else None
|
|
75
|
+
|
|
76
|
+
# Extract the reference section from the response handler
|
|
77
|
+
if generation_context.relevant_documents:
|
|
78
|
+
reference_section = copilot_response_handler.extract_references(
|
|
79
|
+
generation_context.relevant_documents
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
reference_section = None
|
|
83
|
+
|
|
84
|
+
return CopilotRunResult(
|
|
85
|
+
complete_response=complete_response,
|
|
86
|
+
response_category=response_category,
|
|
87
|
+
reference_section=reference_section,
|
|
88
|
+
generation_context=generation_context,
|
|
89
|
+
)
|