rasa-pro 3.15.0a1__py3-none-any.whl → 3.15.0.dev20251027__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rasa-pro might be problematic. Click here for more details.

@@ -344,6 +344,55 @@ ChatMessage = Union[
344
344
  ]
345
345
 
346
346
 
347
+ def create_chat_message_from_dict(message_data: Dict[str, Any]) -> ChatMessage:
348
+ """Parse a single chat message dictionary into a ChatMessage object.
349
+
350
+ This utility function manually parses a chat message dictionary into the
351
+ appropriate ChatMessage type based on its role field.
352
+
353
+ Args:
354
+ message_data: Dictionary containing chat message data
355
+
356
+ Returns:
357
+ Parsed ChatMessage object
358
+
359
+ Raises:
360
+ ValueError: If an unknown role is encountered
361
+
362
+ Example:
363
+ >>> message_data = {
364
+ ... "role": "user",
365
+ ... "content": [{"type": "text", "text": "Hello"}]
366
+ ... }
367
+ >>> message = parse_chat_message_from_dict(message_data)
368
+ >>> isinstance(message, UserChatMessage)
369
+ True
370
+ >>> message.role
371
+ 'user'
372
+ """
373
+ available_roles = [ROLE_USER, ROLE_COPILOT, ROLE_COPILOT_INTERNAL]
374
+ role = message_data.get("role")
375
+
376
+ if role == ROLE_USER:
377
+ return UserChatMessage(**message_data)
378
+ elif role == ROLE_COPILOT:
379
+ return CopilotChatMessage(**message_data)
380
+ elif role == ROLE_COPILOT_INTERNAL:
381
+ return InternalCopilotRequestChatMessage(**message_data)
382
+ else:
383
+ message = (
384
+ f"Unknown role '{role}' in chat message. "
385
+ f"Available roles are: {', '.join(available_roles)}."
386
+ )
387
+ structlogger.error(
388
+ "models.create_chat_message_from_dict.unknown_role",
389
+ event_info=message,
390
+ role=role,
391
+ available_roles=available_roles,
392
+ )
393
+ raise ValueError(message)
394
+
395
+
347
396
  class CopilotContext(BaseModel):
348
397
  """Model containing the context used by the copilot to generate a response."""
349
398
 
@@ -391,37 +440,40 @@ class CopilotRequest(BaseModel):
391
440
 
392
441
  @field_validator("copilot_chat_history", mode="before")
393
442
  @classmethod
394
- def parse_chat_history(cls, v: List[Dict[str, Any]]) -> List[ChatMessage]:
443
+ def parse_chat_history(
444
+ cls, v: Union[List[Dict[str, Any]], List[ChatMessage]]
445
+ ) -> List[ChatMessage]:
395
446
  """Manually parse chat history messages based on role field."""
447
+ # If already parsed ChatMessage objects, return them as-is
448
+ if (
449
+ v
450
+ and isinstance(v, list)
451
+ and all(isinstance(item, ChatMessage) for item in v)
452
+ ):
453
+ return v # type: ignore[return-value]
454
+
455
+ # Check for mixed types (some ChatMessage, some not)
456
+ if (
457
+ v
458
+ and isinstance(v, list)
459
+ and any(isinstance(item, ChatMessage) for item in v)
460
+ ):
461
+ message = (
462
+ "Mixed types in copilot_chat_history: cannot mix ChatMessage objects"
463
+ "with other types."
464
+ )
465
+ structlog.get_logger().error(
466
+ "copilot_request.parse_chat_history.mixed_types",
467
+ event_info=message,
468
+ chat_history_types=[type(item) for item in v],
469
+ )
470
+ raise ValueError(message)
471
+
472
+ # Otherwise, parse from dictionaries
396
473
  parsed_messages: List[ChatMessage] = []
397
- available_roles = [ROLE_USER, ROLE_COPILOT, ROLE_COPILOT_INTERNAL]
398
474
  for message_data in v:
399
- role = message_data.get("role")
400
-
401
- if role == ROLE_USER:
402
- parsed_messages.append(UserChatMessage(**message_data))
403
-
404
- elif role == ROLE_COPILOT:
405
- parsed_messages.append(CopilotChatMessage(**message_data))
406
-
407
- elif role == ROLE_COPILOT_INTERNAL:
408
- parsed_messages.append(
409
- InternalCopilotRequestChatMessage(**message_data)
410
- )
411
-
412
- else:
413
- message = (
414
- f"Unknown role '{role}' in chat message. "
415
- f"Available roles are: {', '.join(available_roles)}."
416
- )
417
- structlogger.error(
418
- "copilot_request.parse_chat_history.unknown_role",
419
- event_info=message,
420
- role=role,
421
- available_roles=available_roles,
422
- )
423
- raise ValueError(message)
424
-
475
+ chat_message = create_chat_message_from_dict(message_data)
476
+ parsed_messages.append(chat_message)
425
477
  return parsed_messages
426
478
 
427
479
  @property
File without changes
@@ -0,0 +1,15 @@
1
+ """Constants for the evaluator module."""
2
+
3
+ from pathlib import Path
4
+
5
+ # Base directory for the rasa package
6
+ BASE_DIR = Path(__file__).parent.parent.parent
7
+
8
+ # Response classification evaluation results directory
9
+ RESPONSE_CLASSIFICATION_EVALUATION_RESULTS_DIR = (
10
+ BASE_DIR / "builder" / "evaluator" / "results"
11
+ )
12
+ # Default output filename
13
+ DEFAULT_RESPONSE_CLASSIFICATION_EVALUATION_TEXT_OUTPUT_FILENAME = "run_results.txt"
14
+ # Default YAML output filename
15
+ RESPONSE_CLASSIFICATION_EVALUATION_YAML_OUTPUT_FILENAME = "run_results.yaml"
@@ -0,0 +1,89 @@
1
+ """Copilot execution utilities for evaluators.
2
+
3
+ This module provides utilities for running copilot operations in evaluation contexts,
4
+ independent of specific evaluation frameworks like Langfuse.
5
+ """
6
+
7
+ from typing import List, Optional
8
+
9
+ import structlog
10
+ from pydantic import BaseModel
11
+
12
+ from rasa.builder.config import COPILOT_HANDLER_ROLLING_BUFFER_SIZE
13
+ from rasa.builder.copilot.models import (
14
+ CopilotContext,
15
+ CopilotGenerationContext,
16
+ GeneratedContent,
17
+ ReferenceSection,
18
+ ResponseCategory,
19
+ )
20
+ from rasa.builder.llm_service import llm_service
21
+
22
+ structlogger = structlog.get_logger()
23
+
24
+
25
+ class CopilotRunResult(BaseModel):
26
+ """Result from running the copilot with response handler."""
27
+
28
+ complete_response: Optional[str]
29
+ response_category: Optional[ResponseCategory]
30
+ reference_section: Optional[ReferenceSection]
31
+ generation_context: CopilotGenerationContext
32
+
33
+
34
+ async def run_copilot_with_response_handler(
35
+ context: CopilotContext,
36
+ ) -> Optional[CopilotRunResult]:
37
+ """Run the copilot with response handler on the given context.
38
+
39
+ This function encapsulates the core copilot execution logic. It handles:
40
+ - Instantiating the copilot and response handler
41
+ - Generating a response and extracting the reference section from the given context
42
+ - Returning structured results
43
+
44
+ Args:
45
+ context: The copilot context to process.
46
+
47
+ Returns:
48
+ CopilotRunResult containing the complete response, category, and generation
49
+ context, or None if execution fails.
50
+
51
+ Raises:
52
+ Any exceptions from the copilot or response handler execution.
53
+ """
54
+ # Instantiate the copilot and response handler
55
+ copilot = llm_service.instantiate_copilot()
56
+ copilot_response_handler = llm_service.instantiate_handler(
57
+ COPILOT_HANDLER_ROLLING_BUFFER_SIZE
58
+ )
59
+
60
+ # Call the copilot to generate a response and handle it with the response
61
+ # handler
62
+ (original_stream, generation_context) = await copilot.generate_response(context)
63
+ intercepted_stream = copilot_response_handler.handle_response(original_stream)
64
+
65
+ # Exhaust the stream to get the complete response for evaluation
66
+ response_chunks: List[str] = []
67
+ response_category = None
68
+ async for chunk in intercepted_stream:
69
+ if not isinstance(chunk, GeneratedContent):
70
+ continue
71
+ response_chunks.append(chunk.content)
72
+ response_category = chunk.response_category
73
+
74
+ complete_response = "".join(response_chunks) if response_chunks else None
75
+
76
+ # Extract the reference section from the response handler
77
+ if generation_context.relevant_documents:
78
+ reference_section = copilot_response_handler.extract_references(
79
+ generation_context.relevant_documents
80
+ )
81
+ else:
82
+ reference_section = None
83
+
84
+ return CopilotRunResult(
85
+ complete_response=complete_response,
86
+ response_category=response_category,
87
+ reference_section=reference_section,
88
+ generation_context=generation_context,
89
+ )
@@ -0,0 +1,173 @@
1
+ from typing import Any, Dict, List, Optional, Union
2
+
3
+ import structlog
4
+ from pydantic import BaseModel, Field, field_validator
5
+
6
+ from rasa.builder.copilot.models import (
7
+ ChatMessage,
8
+ CopilotContext,
9
+ EventContent,
10
+ ReferenceEntry,
11
+ ResponseCategory,
12
+ create_chat_message_from_dict,
13
+ )
14
+ from rasa.builder.document_retrieval.models import Document
15
+ from rasa.builder.shared.tracker_context import TrackerContext
16
+
17
+ structlogger = structlog.get_logger()
18
+
19
+
20
+ class DatasetInput(BaseModel):
21
+ """Model for the input field of a dataset entry."""
22
+
23
+ message: Optional[str] = None
24
+ tracker_event_attachments: List[EventContent] = Field(default_factory=list)
25
+
26
+
27
+ class DatasetExpectedOutput(BaseModel):
28
+ """Model for the expected_output field of a dataset entry."""
29
+
30
+ answer: str
31
+ response_category: ResponseCategory
32
+ references: list[ReferenceEntry]
33
+
34
+
35
+ class DatasetMetadataCopilotAdditionalContext(BaseModel):
36
+ """Model for the copilot_additional_context in metadata."""
37
+
38
+ relevant_documents: List[Document] = Field(default_factory=list)
39
+ relevant_assistant_files: Dict[str, str] = Field(default_factory=dict)
40
+ assistant_tracker_context: Optional[Dict[str, Any]] = None
41
+ assistant_logs: str = Field(default="")
42
+ copilot_chat_history: List[ChatMessage] = Field(default_factory=list)
43
+
44
+ @field_validator("copilot_chat_history", mode="before")
45
+ @classmethod
46
+ def parse_chat_history(
47
+ cls, v: Union[List[Dict[str, Any]], List[ChatMessage]]
48
+ ) -> List[ChatMessage]:
49
+ """Manually parse chat history messages based on role field."""
50
+ # If already parsed ChatMessage objects, return them as-is
51
+ if (
52
+ v
53
+ and isinstance(v, list)
54
+ and all(isinstance(item, ChatMessage) for item in v)
55
+ ):
56
+ return v # type: ignore[return-value]
57
+
58
+ # Check for mixed types (some ChatMessage, some not)
59
+ if (
60
+ v
61
+ and isinstance(v, list)
62
+ and any(isinstance(item, ChatMessage) for item in v)
63
+ ):
64
+ message = (
65
+ "Mixed types in copilot_chat_history: cannot mix ChatMessage objects "
66
+ "with other types."
67
+ )
68
+ structlogger.error(
69
+ "dataset_entry.parse_chat_history.mixed_types",
70
+ event_info=message,
71
+ chat_history_types=[type(item) for item in v],
72
+ )
73
+ raise ValueError(message)
74
+
75
+ # Otherwise, parse from dictionaries
76
+ parsed_messages: List[ChatMessage] = []
77
+ for message_data in v:
78
+ chat_message = create_chat_message_from_dict(message_data)
79
+ parsed_messages.append(chat_message)
80
+ return parsed_messages
81
+
82
+
83
+ class DatasetMetadata(BaseModel):
84
+ """Model for the metadata field of a dataset entry."""
85
+
86
+ ids: Dict[str, str] = Field(default_factory=dict)
87
+ copilot_additional_context: DatasetMetadataCopilotAdditionalContext = Field(
88
+ default_factory=DatasetMetadataCopilotAdditionalContext
89
+ )
90
+
91
+
92
+ class DatasetEntry(BaseModel):
93
+ """Pydantic model for dataset entries from Langfuse ExperimentItem."""
94
+
95
+ # Basic fields from ExperimentItem
96
+ id: str
97
+ input: DatasetInput
98
+ expected_output: DatasetExpectedOutput
99
+ metadata: DatasetMetadata
100
+
101
+ def to_copilot_context(self) -> CopilotContext:
102
+ """Create a CopilotContext from the dataset entry.
103
+
104
+ Raises:
105
+ ValueError: If the metadata is None, as it's required for creating a valid
106
+ CopilotContext.
107
+
108
+ Returns:
109
+ CopilotContext with all the context information.
110
+ """
111
+ if self.metadata is None:
112
+ message = (
113
+ f"Cannot create CopilotContext from dataset item with id: {self.id}. "
114
+ f"Metadata is required but was None."
115
+ )
116
+ structlogger.error(
117
+ "dataset_entry.to_copilot_context.metadata_is_none",
118
+ event_info=message,
119
+ item_id=self.id,
120
+ item_metadata=self.metadata,
121
+ )
122
+ raise ValueError(message)
123
+
124
+ # Parse tracker context if available
125
+ tracker_context = None
126
+ if (
127
+ self.metadata.copilot_additional_context.assistant_tracker_context
128
+ is not None
129
+ ):
130
+ tracker_context = TrackerContext(
131
+ **self.metadata.copilot_additional_context.assistant_tracker_context
132
+ )
133
+
134
+ return CopilotContext(
135
+ tracker_context=tracker_context,
136
+ assistant_logs=self.metadata.copilot_additional_context.assistant_logs,
137
+ assistant_files=self.metadata.copilot_additional_context.relevant_assistant_files,
138
+ copilot_chat_history=self.metadata.copilot_additional_context.copilot_chat_history,
139
+ )
140
+
141
+ @classmethod
142
+ def from_raw_data(
143
+ cls,
144
+ id: str,
145
+ input_data: Dict[str, Any],
146
+ expected_output_data: Dict[str, Any],
147
+ metadata_data: Dict[str, Any],
148
+ ) -> "DatasetEntry":
149
+ """Create a DatasetEntry from raw dictionary data.
150
+
151
+ Args:
152
+ id: The dataset entry ID.
153
+ input_data: Raw input dictionary.
154
+ expected_output_data: Raw expected output dictionary.
155
+ metadata_data: Raw metadata dictionary with all the additional context
156
+ used to generate the Copilot response.
157
+
158
+ Returns:
159
+ DatasetEntry with parsed data.
160
+ """
161
+ # Use Pydantic's model_validate to parse nested structures
162
+ dataset_input = DatasetInput.model_validate(input_data)
163
+ dataset_expected_output = DatasetExpectedOutput.model_validate(
164
+ expected_output_data
165
+ )
166
+ dataset_metadata = DatasetMetadata.model_validate(metadata_data)
167
+
168
+ return cls(
169
+ id=id,
170
+ input=dataset_input,
171
+ expected_output=dataset_expected_output,
172
+ metadata=dataset_metadata,
173
+ )
@@ -0,0 +1,4 @@
1
+ class EvaluationError(Exception):
2
+ """Base exception for evaluation-related errors."""
3
+
4
+ pass
@@ -0,0 +1,66 @@
1
+ """Constants for the response classification evaluator."""
2
+
3
+ from typing import List, Literal
4
+
5
+ # Options for averaging methods for Response Classification Evaluator
6
+ MICRO_AVERAGING_METHOD: Literal["micro"] = "micro"
7
+ MACRO_AVERAGING_METHOD: Literal["macro"] = "macro"
8
+ WEIGHTED_AVERAGING_METHOD: Literal["weighted"] = "weighted"
9
+
10
+ AVERAGING_METHODS: List[Literal["micro", "macro", "weighted"]] = [
11
+ MICRO_AVERAGING_METHOD,
12
+ MACRO_AVERAGING_METHOD,
13
+ WEIGHTED_AVERAGING_METHOD,
14
+ ]
15
+
16
+ # Overall evaluation metric names
17
+ MICRO_PRECISION_METRIC = "micro_precision"
18
+ MACRO_PRECISION_METRIC = "macro_precision"
19
+ WEIGHTED_PRECISION_METRIC = "weighted_precision"
20
+
21
+ MICRO_RECALL_METRIC = "micro_recall"
22
+ MACRO_RECALL_METRIC = "macro_recall"
23
+ WEIGHTED_RECALL_METRIC = "weighted_recall"
24
+
25
+ MICRO_F1_METRIC = "micro_f1"
26
+ MACRO_F1_METRIC = "macro_f1"
27
+ WEIGHTED_F1_METRIC = "weighted_f1"
28
+
29
+ # Skip count metric name due to invalid data
30
+ SKIP_COUNT_METRIC = "skipped_items"
31
+
32
+ # Per-class evaluation metric name templates
33
+ PER_CLASS_PRECISION_METRIC_TEMPLATE = "{category}_precision"
34
+ PER_CLASS_RECALL_METRIC_TEMPLATE = "{category}_recall"
35
+ PER_CLASS_F1_METRIC_TEMPLATE = "{category}_f1"
36
+ PER_CLASS_SUPPORT_METRIC_TEMPLATE = "{category}_support"
37
+
38
+ # Description templates for evaluation metrics
39
+ MICRO_PRECISION_DESCRIPTION = "Micro Precision: {value:.3f}"
40
+ MACRO_PRECISION_DESCRIPTION = "Macro Precision: {value:.3f}"
41
+ WEIGHTED_PRECISION_DESCRIPTION = "Weighted Precision: {value:.3f}"
42
+
43
+ MICRO_RECALL_DESCRIPTION = "Micro Recall: {value:.3f}"
44
+ MACRO_RECALL_DESCRIPTION = "Macro Recall: {value:.3f}"
45
+ WEIGHTED_RECALL_DESCRIPTION = "Weighted Recall: {value:.3f}"
46
+
47
+ MICRO_F1_DESCRIPTION = "Micro F1: {value:.3f}"
48
+ MACRO_F1_DESCRIPTION = "Macro F1: {value:.3f}"
49
+ WEIGHTED_F1_DESCRIPTION = "Weighted F1: {value:.3f}"
50
+
51
+ # Skip count metric description
52
+ SKIP_COUNT_DESCRIPTION = "Skipped {value} items due to invalid data"
53
+
54
+ # Per-class description templates
55
+ PER_CLASS_PRECISION_DESCRIPTION = "[{category}] Precision: {value:.3f}"
56
+ PER_CLASS_RECALL_DESCRIPTION = "[{category}] Recall: {value:.3f}"
57
+ PER_CLASS_F1_DESCRIPTION = "[{category}] F1: {value:.3f}"
58
+ PER_CLASS_SUPPORT_DESCRIPTION = "[{category}] Support: {value}"
59
+
60
+ # Experiment configuration
61
+ EXPERIMENT_NAME = "Copilot Response Classification Evaluation"
62
+ EXPERIMENT_DESCRIPTION = (
63
+ "Evaluating Copilot response classification performance with per-class metrics "
64
+ "and overall averages (micro, macro, weighted). The metric that are reported are: "
65
+ "precision, recall, F1, support."
66
+ )