rasa-pro 3.14.1__py3-none-any.whl → 3.15.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/builder/config.py +4 -0
- rasa/builder/copilot/copilot.py +28 -9
- rasa/builder/copilot/models.py +171 -4
- rasa/builder/document_retrieval/inkeep_document_retrieval.py +2 -0
- rasa/builder/download.py +1 -1
- rasa/builder/service.py +101 -24
- rasa/builder/telemetry/__init__.py +0 -0
- rasa/builder/telemetry/copilot_langfuse_telemetry.py +384 -0
- rasa/builder/{copilot/telemetry.py → telemetry/copilot_segment_telemetry.py} +21 -3
- rasa/constants.py +1 -0
- rasa/core/policies/flows/flow_executor.py +20 -6
- rasa/core/run.py +15 -4
- rasa/e2e_test/e2e_config.py +4 -3
- rasa/engine/recipes/default_components.py +16 -6
- rasa/graph_components/validators/default_recipe_validator.py +10 -4
- rasa/nlu/classifiers/diet_classifier.py +2 -0
- rasa/shared/core/slots.py +55 -24
- rasa/shared/utils/common.py +9 -1
- rasa/utils/common.py +9 -0
- rasa/utils/endpoints.py +2 -0
- rasa/utils/installation_utils.py +111 -0
- rasa/utils/tensorflow/callback.py +2 -0
- rasa/utils/train_utils.py +2 -0
- rasa/version.py +1 -1
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/METADATA +4 -2
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/RECORD +29 -26
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from typing import (
|
|
3
|
+
TYPE_CHECKING,
|
|
4
|
+
Any,
|
|
5
|
+
AsyncGenerator,
|
|
6
|
+
Callable,
|
|
7
|
+
Dict,
|
|
8
|
+
List,
|
|
9
|
+
Optional,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from rasa.builder.copilot.copilot import Copilot
|
|
14
|
+
from rasa.builder.copilot.models import CopilotContext
|
|
15
|
+
from rasa.builder.document_retrieval.inkeep_document_retrieval import (
|
|
16
|
+
InKeepDocumentRetrieval,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
import langfuse
|
|
20
|
+
import structlog
|
|
21
|
+
|
|
22
|
+
from rasa.builder.copilot.copilot_response_handler import CopilotResponseHandler
|
|
23
|
+
from rasa.builder.copilot.models import (
|
|
24
|
+
CopilotRequest,
|
|
25
|
+
EventContent,
|
|
26
|
+
UsageStatistics,
|
|
27
|
+
UserChatMessage,
|
|
28
|
+
)
|
|
29
|
+
from rasa.builder.document_retrieval.models import Document
|
|
30
|
+
from rasa.builder.models import BotFiles
|
|
31
|
+
from rasa.builder.shared.tracker_context import TrackerContext
|
|
32
|
+
|
|
33
|
+
structlogger = structlog.get_logger()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CopilotLangfuseTelemetry:
|
|
37
|
+
@staticmethod
|
|
38
|
+
def trace_copilot_tracker_context(
|
|
39
|
+
tracker_context: Optional[TrackerContext],
|
|
40
|
+
max_conversation_turns: int,
|
|
41
|
+
session_id: str,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Trace the copilot tracker context.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
tracker_context: The tracker context.
|
|
47
|
+
max_conversation_turns: The maximum number of conversation turns to be
|
|
48
|
+
fetched from the tracker.
|
|
49
|
+
session_id: The session ID used to fetch the right tracker.
|
|
50
|
+
"""
|
|
51
|
+
langfuse_client = langfuse.get_client()
|
|
52
|
+
# Use `update_current_span` to update the current span of the trace.
|
|
53
|
+
langfuse_client.update_current_span(
|
|
54
|
+
output={
|
|
55
|
+
"tracker_context": (
|
|
56
|
+
tracker_context.model_dump() if tracker_context else None
|
|
57
|
+
),
|
|
58
|
+
},
|
|
59
|
+
metadata={
|
|
60
|
+
"max_conversation_turns": max_conversation_turns,
|
|
61
|
+
"session_id": session_id,
|
|
62
|
+
},
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def trace_copilot_relevant_assistant_files(
|
|
67
|
+
relevant_assistant_files: BotFiles,
|
|
68
|
+
) -> None:
|
|
69
|
+
"""Trace the copilot relevant assistant files.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
relevant_assistant_files: The relevant assistant files.
|
|
73
|
+
"""
|
|
74
|
+
langfuse_client = langfuse.get_client()
|
|
75
|
+
# Use `update_current_span` to update the current span of the trace.
|
|
76
|
+
langfuse_client.update_current_span(
|
|
77
|
+
output={
|
|
78
|
+
"relevant_assistant_files": relevant_assistant_files,
|
|
79
|
+
},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def setup_copilot_endpoint_call_trace_attributes(
|
|
84
|
+
hello_rasa_project_id: str,
|
|
85
|
+
chat_id: str,
|
|
86
|
+
user_id: str,
|
|
87
|
+
request: CopilotRequest,
|
|
88
|
+
handler: CopilotResponseHandler,
|
|
89
|
+
relevant_documents: list[Document],
|
|
90
|
+
copilot_context: "CopilotContext",
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Set up the current langfuse trace with project and user context.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
hello_rasa_project_id: The Hello Rasa project ID.
|
|
96
|
+
chat_id: The chat/conversation ID.
|
|
97
|
+
user_id: The user ID.
|
|
98
|
+
request: The parsed CopilotRequest object.
|
|
99
|
+
handler: The response handler containing generated responses.
|
|
100
|
+
relevant_documents: The relevant documents used to generate the response.
|
|
101
|
+
"""
|
|
102
|
+
langfuse_client = langfuse.get_client()
|
|
103
|
+
user_message = CopilotLangfuseTelemetry._extract_last_user_message_content(
|
|
104
|
+
request
|
|
105
|
+
)
|
|
106
|
+
tracker_event_attachments = (
|
|
107
|
+
CopilotLangfuseTelemetry._extract_tracker_event_attachments(request)
|
|
108
|
+
)
|
|
109
|
+
response_category = CopilotLangfuseTelemetry._extract_response_category(handler)
|
|
110
|
+
reference_section_entries = CopilotLangfuseTelemetry._extract_references(
|
|
111
|
+
handler, relevant_documents
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Create a session ID as a composite ID from project id, user id and chat id
|
|
115
|
+
session_id = CopilotLangfuseTelemetry._create_session_id(
|
|
116
|
+
hello_rasa_project_id, user_id, chat_id
|
|
117
|
+
)
|
|
118
|
+
# Use `update_current_trace` to update the top level trace.
|
|
119
|
+
langfuse_client.update_current_trace(
|
|
120
|
+
user_id=user_id,
|
|
121
|
+
session_id=session_id,
|
|
122
|
+
input={
|
|
123
|
+
"message": user_message,
|
|
124
|
+
"tracker_event_attachments": tracker_event_attachments,
|
|
125
|
+
},
|
|
126
|
+
output={
|
|
127
|
+
"answer": CopilotLangfuseTelemetry._full_text(handler),
|
|
128
|
+
"response_category": response_category,
|
|
129
|
+
"references": reference_section_entries,
|
|
130
|
+
},
|
|
131
|
+
metadata={
|
|
132
|
+
"ids": {
|
|
133
|
+
"user_id": user_id,
|
|
134
|
+
"project_id": hello_rasa_project_id,
|
|
135
|
+
"chat_history_id": chat_id,
|
|
136
|
+
},
|
|
137
|
+
"copilot_additional_context": {
|
|
138
|
+
"relevant_documents": [
|
|
139
|
+
doc.model_dump() for doc in relevant_documents
|
|
140
|
+
],
|
|
141
|
+
"relevant_assistant_files": copilot_context.assistant_files,
|
|
142
|
+
"assistant_tracker_context": (
|
|
143
|
+
copilot_context.tracker_context.model_dump()
|
|
144
|
+
if copilot_context.tracker_context
|
|
145
|
+
else None
|
|
146
|
+
),
|
|
147
|
+
"assistant_logs": copilot_context.assistant_logs,
|
|
148
|
+
"copilot_chat_history": [
|
|
149
|
+
message.model_dump()
|
|
150
|
+
for message in copilot_context.copilot_chat_history
|
|
151
|
+
],
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
tags=[response_category] if response_category else [],
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def trace_copilot_streaming_generation(
|
|
159
|
+
func: Callable[..., AsyncGenerator[str, None]],
|
|
160
|
+
) -> Callable[..., AsyncGenerator[str, None]]:
|
|
161
|
+
"""Custom decorator for tracing async streaming of the Copilot's LLM generation.
|
|
162
|
+
|
|
163
|
+
This decorator handles Langfuse tracing for async streaming of the Copilot's LLM
|
|
164
|
+
generation by manually managing the generation span and updating it with usage
|
|
165
|
+
statistics after the stream completes.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
@wraps(func)
|
|
169
|
+
async def wrapper(
|
|
170
|
+
self: "Copilot", messages: List[Dict[str, Any]]
|
|
171
|
+
) -> AsyncGenerator[str, None]:
|
|
172
|
+
langfuse_client = langfuse.get_client()
|
|
173
|
+
|
|
174
|
+
with langfuse_client.start_as_current_generation(
|
|
175
|
+
name=f"{self.__class__.__name__}.{func.__name__}",
|
|
176
|
+
input={"messages": messages},
|
|
177
|
+
) as generation:
|
|
178
|
+
output = []
|
|
179
|
+
# Call the original streaming function and start capturing the output
|
|
180
|
+
async for chunk in func(self, messages):
|
|
181
|
+
output.append(chunk)
|
|
182
|
+
yield chunk
|
|
183
|
+
|
|
184
|
+
# Update the span's model parameters and output after streaming is
|
|
185
|
+
# complete
|
|
186
|
+
generation.update(
|
|
187
|
+
model_parameters=self.llm_config, output="".join(output)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Update the span's usage statistics after streaming is complete
|
|
191
|
+
if self.usage_statistics:
|
|
192
|
+
CopilotLangfuseTelemetry._update_generation_span_with_usage_statistics(
|
|
193
|
+
generation, self.usage_statistics
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return wrapper
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def trace_document_retrieval_generation(
|
|
200
|
+
func: Callable[..., Any],
|
|
201
|
+
) -> Callable[..., Any]:
|
|
202
|
+
"""Custom decorator for tracing document retrieval generation with Langfuse.
|
|
203
|
+
|
|
204
|
+
This decorator handles Langfuse tracing for document retrieval API calls
|
|
205
|
+
by manually managing the generation span and updating it with usage statistics.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
@wraps(func)
|
|
209
|
+
async def wrapper(
|
|
210
|
+
self: "InKeepDocumentRetrieval",
|
|
211
|
+
query: str,
|
|
212
|
+
temperature: float,
|
|
213
|
+
timeout: float,
|
|
214
|
+
) -> Any:
|
|
215
|
+
langfuse_client = langfuse.get_client()
|
|
216
|
+
|
|
217
|
+
with langfuse_client.start_as_current_generation(
|
|
218
|
+
name=f"{self.__class__.__name__}.{func.__name__}",
|
|
219
|
+
input={
|
|
220
|
+
"query": query,
|
|
221
|
+
"temperature": temperature,
|
|
222
|
+
"timeout": timeout,
|
|
223
|
+
},
|
|
224
|
+
) as generation:
|
|
225
|
+
# Call the original function
|
|
226
|
+
response = await func(self, query, temperature, timeout)
|
|
227
|
+
|
|
228
|
+
# Update the span with response content
|
|
229
|
+
generation.update(
|
|
230
|
+
output=response,
|
|
231
|
+
model_parameters={
|
|
232
|
+
"temperature": str(temperature),
|
|
233
|
+
"timeout": str(timeout),
|
|
234
|
+
},
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Update usage statistics if available
|
|
238
|
+
usage_statistics = UsageStatistics.from_chat_completion_response(
|
|
239
|
+
response
|
|
240
|
+
)
|
|
241
|
+
if usage_statistics:
|
|
242
|
+
CopilotLangfuseTelemetry._update_generation_span_with_usage_statistics(
|
|
243
|
+
generation, usage_statistics
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return response
|
|
247
|
+
|
|
248
|
+
return wrapper
|
|
249
|
+
|
|
250
|
+
@staticmethod
|
|
251
|
+
def _extract_last_user_message_content(request: CopilotRequest) -> Optional[str]:
|
|
252
|
+
"""Extract the last user message from the CopilotRequest object.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
request: The CopilotRequest object.
|
|
256
|
+
"""
|
|
257
|
+
if not isinstance(request.last_message, UserChatMessage):
|
|
258
|
+
return None
|
|
259
|
+
return request.last_message.get_flattened_text_content()
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def _extract_tracker_event_attachments(
|
|
263
|
+
request: CopilotRequest,
|
|
264
|
+
) -> list[Dict[str, Any]]:
|
|
265
|
+
"""Extract tracker event attachments from the last user message.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
request: The CopilotRequest object.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
The event content block sent with the last user message in the
|
|
272
|
+
dictionary format.
|
|
273
|
+
"""
|
|
274
|
+
last_message = request.last_message
|
|
275
|
+
if not isinstance(last_message, UserChatMessage):
|
|
276
|
+
return []
|
|
277
|
+
return [
|
|
278
|
+
attachment.model_dump()
|
|
279
|
+
for attachment in last_message.get_content_blocks_by_type(EventContent)
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def _extract_response_category(handler: CopilotResponseHandler) -> Optional[str]:
|
|
284
|
+
"""Extract the response category from the response handler.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
handler: The response handler containing generated response.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
The response category of the first generated response, or None if no
|
|
291
|
+
responses.
|
|
292
|
+
"""
|
|
293
|
+
if not handler.generated_responses:
|
|
294
|
+
return None
|
|
295
|
+
# The handler contains multiple chunks of one response. We use the first chunk's
|
|
296
|
+
# response category.
|
|
297
|
+
return handler.generated_responses[0].response_category.value
|
|
298
|
+
|
|
299
|
+
@staticmethod
|
|
300
|
+
def _full_text(handler: CopilotResponseHandler) -> str:
|
|
301
|
+
"""Extract full text from the response handler.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
handler: The response handler containing generated responses.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
The concatenated content of all generated responses.
|
|
308
|
+
"""
|
|
309
|
+
return "".join(
|
|
310
|
+
response.content
|
|
311
|
+
for response in handler.generated_responses
|
|
312
|
+
if getattr(response, "content", None)
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
def _extract_references(
|
|
317
|
+
handler: CopilotResponseHandler,
|
|
318
|
+
relevant_documents: list[Document],
|
|
319
|
+
) -> List[Dict[str, Any]]:
|
|
320
|
+
"""Extract reference entries from the response handler.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
handler: The response handler containing generated responses.
|
|
324
|
+
relevant_documents: The relevant documents used to generate the response.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
A list of reference entries in dictionary format.
|
|
328
|
+
"""
|
|
329
|
+
if not relevant_documents:
|
|
330
|
+
return []
|
|
331
|
+
|
|
332
|
+
reference_entries: list[Dict[str, Any]] = []
|
|
333
|
+
reference_section = handler.extract_references(relevant_documents)
|
|
334
|
+
for reference_entry in reference_section.references:
|
|
335
|
+
reference_entries.append(
|
|
336
|
+
reference_entry.model_dump(
|
|
337
|
+
exclude={"response_category", "response_completeness"}
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return reference_entries
|
|
342
|
+
|
|
343
|
+
@staticmethod
|
|
344
|
+
def _update_generation_span_with_usage_statistics(
|
|
345
|
+
generation_span: langfuse.LangfuseGeneration,
|
|
346
|
+
usage_statistics: UsageStatistics,
|
|
347
|
+
) -> None:
|
|
348
|
+
"""Update the generation span with the usage statistics.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
generation_span: The generation span.
|
|
352
|
+
usage_statistics: The usage statistics of the generation.
|
|
353
|
+
"""
|
|
354
|
+
generation_span.update(
|
|
355
|
+
usage_details={
|
|
356
|
+
"input_non_cached_usage": (
|
|
357
|
+
usage_statistics.non_cached_prompt_tokens or 0
|
|
358
|
+
),
|
|
359
|
+
"input_cached_usage": usage_statistics.cached_prompt_tokens or 0,
|
|
360
|
+
"output_usage": usage_statistics.completion_tokens or 0,
|
|
361
|
+
"total": usage_statistics.total_tokens or 0,
|
|
362
|
+
},
|
|
363
|
+
cost_details={
|
|
364
|
+
"input_non_cached_cost": usage_statistics.non_cached_cost or 0,
|
|
365
|
+
"input_cached_cost": usage_statistics.cached_cost or 0,
|
|
366
|
+
"output_cost": usage_statistics.output_cost or 0,
|
|
367
|
+
"total": usage_statistics.total_cost or 0,
|
|
368
|
+
},
|
|
369
|
+
model=usage_statistics.model,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
@staticmethod
|
|
373
|
+
def _create_session_id(
|
|
374
|
+
hello_rasa_project_id: str,
|
|
375
|
+
user_id: str,
|
|
376
|
+
chat_id: str,
|
|
377
|
+
) -> str:
|
|
378
|
+
"""Create a session ID as a composite from project id, user id and chat id."""
|
|
379
|
+
pattern = "PID-{project_id}-UID-{user_id}-CID-{chat_id}"
|
|
380
|
+
return pattern.format(
|
|
381
|
+
project_id=hello_rasa_project_id,
|
|
382
|
+
user_id=user_id,
|
|
383
|
+
chat_id=chat_id,
|
|
384
|
+
)
|
|
@@ -1,14 +1,25 @@
|
|
|
1
1
|
import datetime as dt
|
|
2
2
|
import os
|
|
3
3
|
import uuid
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import (
|
|
5
|
+
TYPE_CHECKING,
|
|
6
|
+
Any,
|
|
7
|
+
Iterable,
|
|
8
|
+
Optional,
|
|
9
|
+
Sequence,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
pass
|
|
5
14
|
|
|
6
15
|
import structlog
|
|
7
16
|
|
|
8
17
|
from rasa import telemetry
|
|
9
18
|
from rasa.builder.copilot.constants import COPILOT_SEGMENT_WRITE_KEY_ENV_VAR
|
|
10
19
|
from rasa.builder.copilot.copilot_response_handler import CopilotResponseHandler
|
|
11
|
-
from rasa.builder.copilot.models import
|
|
20
|
+
from rasa.builder.copilot.models import (
|
|
21
|
+
EventContent,
|
|
22
|
+
)
|
|
12
23
|
from rasa.builder.document_retrieval.models import Document
|
|
13
24
|
from rasa.telemetry import (
|
|
14
25
|
SEGMENT_TRACK_ENDPOINT,
|
|
@@ -56,7 +67,7 @@ def _track(event: str, user_id: str, properties: dict) -> None:
|
|
|
56
67
|
structlogger.warning("builder.telemetry.track_failed", error=str(e))
|
|
57
68
|
|
|
58
69
|
|
|
59
|
-
class
|
|
70
|
+
class CopilotSegmentTelemetry:
|
|
60
71
|
def __init__(
|
|
61
72
|
self,
|
|
62
73
|
*,
|
|
@@ -96,6 +107,7 @@ class CopilotTelemetry:
|
|
|
96
107
|
latency_ms: int,
|
|
97
108
|
model: str,
|
|
98
109
|
input_tokens: Optional[int] = None,
|
|
110
|
+
cached_prompt_tokens: Optional[int] = None,
|
|
99
111
|
output_tokens: Optional[int] = None,
|
|
100
112
|
total_tokens: Optional[int] = None,
|
|
101
113
|
system_message: Optional[dict[str, Any]] = None,
|
|
@@ -112,6 +124,7 @@ class CopilotTelemetry:
|
|
|
112
124
|
latency_ms: End-to-end Copilot latency to produce this response.
|
|
113
125
|
model: The model used to generate the response.
|
|
114
126
|
input_tokens: Number of input tokens used (optional).
|
|
127
|
+
cached_prompt_tokens: Number of cached prompt tokens.
|
|
115
128
|
output_tokens: Number of output tokens generated (optional).
|
|
116
129
|
total_tokens: Total number of tokens used (input + output) (optional).
|
|
117
130
|
system_message: The system message used (optional).
|
|
@@ -135,6 +148,7 @@ class CopilotTelemetry:
|
|
|
135
148
|
"latency_ms": latency_ms,
|
|
136
149
|
"model": model,
|
|
137
150
|
"input_tokens": input_tokens,
|
|
151
|
+
"cached_prompt_tokens": cached_prompt_tokens,
|
|
138
152
|
"output_tokens": output_tokens,
|
|
139
153
|
"total_tokens": total_tokens,
|
|
140
154
|
"chat_history": chat_history,
|
|
@@ -193,6 +207,7 @@ class CopilotTelemetry:
|
|
|
193
207
|
latency_ms: int,
|
|
194
208
|
model: str,
|
|
195
209
|
prompt_tokens: int,
|
|
210
|
+
cached_prompt_tokens: int,
|
|
196
211
|
completion_tokens: int,
|
|
197
212
|
total_tokens: int,
|
|
198
213
|
system_message: dict[str, Any],
|
|
@@ -208,11 +223,13 @@ class CopilotTelemetry:
|
|
|
208
223
|
latency_ms: End-to-end Copilot latency to produce this response.
|
|
209
224
|
model: The model used to generate the response.
|
|
210
225
|
prompt_tokens: Number of input tokens used.
|
|
226
|
+
cached_prompt_tokens: Number of cached prompt tokens.
|
|
211
227
|
completion_tokens: Number of output tokens generated.
|
|
212
228
|
total_tokens: Total number of tokens used (input + output).
|
|
213
229
|
system_message: The system message used.
|
|
214
230
|
chat_history: The chat history messages used.
|
|
215
231
|
last_user_message: The last user message used.
|
|
232
|
+
tracker_event_attachments: List of tracker event attachments.
|
|
216
233
|
"""
|
|
217
234
|
structlogger.debug("builder.telemetry.log_copilot_from_handler")
|
|
218
235
|
text = self._full_text(handler)
|
|
@@ -223,6 +240,7 @@ class CopilotTelemetry:
|
|
|
223
240
|
latency_ms=latency_ms,
|
|
224
241
|
model=model,
|
|
225
242
|
input_tokens=prompt_tokens,
|
|
243
|
+
cached_prompt_tokens=cached_prompt_tokens,
|
|
226
244
|
output_tokens=completion_tokens,
|
|
227
245
|
total_tokens=total_tokens,
|
|
228
246
|
system_message=system_message,
|
rasa/constants.py
CHANGED
|
@@ -33,6 +33,7 @@ ENV_MCP_LOGGING_ENABLED = "MCP_LOGGING_ENABLED"
|
|
|
33
33
|
ENV_LOG_LEVEL_MATPLOTLIB = "LOG_LEVEL_MATPLOTLIB"
|
|
34
34
|
ENV_LOG_LEVEL_RABBITMQ = "LOG_LEVEL_RABBITMQ"
|
|
35
35
|
ENV_LOG_LEVEL_KAFKA = "LOG_LEVEL_KAFKA"
|
|
36
|
+
ENV_LOG_LEVEL_PYMONGO = "LOG_LEVEL_PYMONGO"
|
|
36
37
|
|
|
37
38
|
DEFAULT_SANIC_WORKERS = 1
|
|
38
39
|
ENV_SANIC_WORKERS = "SANIC_WORKERS"
|
|
@@ -357,6 +357,10 @@ def reset_scoped_slots(
|
|
|
357
357
|
flow_persistable_slots = current_flow.persisted_slots
|
|
358
358
|
|
|
359
359
|
for step in current_flow.steps_with_calls_resolved:
|
|
360
|
+
# take persisted slots from called flows into consideration
|
|
361
|
+
# before resetting slots
|
|
362
|
+
if isinstance(step, CallFlowStep) and step.called_flow_reference:
|
|
363
|
+
flow_persistable_slots.extend(step.called_flow_reference.persisted_slots)
|
|
360
364
|
if isinstance(step, CollectInformationFlowStep):
|
|
361
365
|
# reset all slots scoped to the flow
|
|
362
366
|
slot_name = step.collect
|
|
@@ -368,7 +372,22 @@ def reset_scoped_slots(
|
|
|
368
372
|
# slots set by the set slots step should be reset after the flow ends
|
|
369
373
|
# unless they are also used in a collect step where `reset_after_flow_ends`
|
|
370
374
|
# is set to `False` or set in the `persisted_slots` list.
|
|
371
|
-
resettable_set_slots =
|
|
375
|
+
resettable_set_slots = _get_resettable_set_slots(
|
|
376
|
+
current_flow, not_resettable_slot_names, flow_persistable_slots
|
|
377
|
+
)
|
|
378
|
+
for name in resettable_set_slots:
|
|
379
|
+
_reset_slot(name, tracker)
|
|
380
|
+
|
|
381
|
+
return events
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _get_resettable_set_slots(
|
|
385
|
+
current_flow: Flow,
|
|
386
|
+
not_resettable_slot_names: set[Text],
|
|
387
|
+
flow_persistable_slots: List[Text],
|
|
388
|
+
) -> List[Text]:
|
|
389
|
+
"""Get list of slot names from SetSlotsFlowStep that should be reset."""
|
|
390
|
+
return [
|
|
372
391
|
slot["key"]
|
|
373
392
|
for step in current_flow.steps_with_calls_resolved
|
|
374
393
|
if isinstance(step, SetSlotsFlowStep)
|
|
@@ -377,11 +396,6 @@ def reset_scoped_slots(
|
|
|
377
396
|
and slot["key"] not in flow_persistable_slots
|
|
378
397
|
]
|
|
379
398
|
|
|
380
|
-
for name in resettable_set_slots:
|
|
381
|
-
_reset_slot(name, tracker)
|
|
382
|
-
|
|
383
|
-
return events
|
|
384
|
-
|
|
385
399
|
|
|
386
400
|
async def advance_flows(
|
|
387
401
|
tracker: DialogueStateTracker,
|
rasa/core/run.py
CHANGED
|
@@ -328,10 +328,21 @@ def serve_application(
|
|
|
328
328
|
|
|
329
329
|
logger.info(f"Starting Rasa server on {protocol}://{interface}:{port}")
|
|
330
330
|
|
|
331
|
-
app
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
331
|
+
async def load_agent_and_check_failure(app: Sanic, loop: AbstractEventLoop) -> None:
|
|
332
|
+
"""Load agent and exit if it fails in non-debug mode."""
|
|
333
|
+
try:
|
|
334
|
+
await load_agent_on_start(
|
|
335
|
+
model_path, endpoints, remote_storage, sub_agents, app, loop
|
|
336
|
+
)
|
|
337
|
+
except Exception as e:
|
|
338
|
+
is_debug = logger.isEnabledFor(logging.DEBUG)
|
|
339
|
+
if is_debug:
|
|
340
|
+
raise e # show traceback in debug
|
|
341
|
+
# non-debug: log and exit without starting server
|
|
342
|
+
logger.error(f"Failed to load agent: {e}")
|
|
343
|
+
os._exit(1) # Any other exit method would show a traceback.
|
|
344
|
+
|
|
345
|
+
app.register_listener(load_agent_and_check_failure, "before_server_start")
|
|
335
346
|
|
|
336
347
|
app.register_listener(
|
|
337
348
|
licensing.validate_limited_server_license, "after_server_start"
|
rasa/e2e_test/e2e_config.py
CHANGED
|
@@ -72,9 +72,10 @@ class LLMJudgeConfig(BaseModel):
|
|
|
72
72
|
|
|
73
73
|
llm_config = resolve_model_client_config(llm_config)
|
|
74
74
|
llm_config, llm_extra_parameters = cls.extract_attributes(llm_config)
|
|
75
|
-
|
|
76
|
-
llm_config
|
|
77
|
-
|
|
75
|
+
if not llm_config:
|
|
76
|
+
llm_config = combine_custom_and_default_config(
|
|
77
|
+
llm_config, cls.get_default_llm_config()
|
|
78
|
+
)
|
|
78
79
|
embeddings_config = resolve_model_client_config(embeddings)
|
|
79
80
|
embeddings_config, embeddings_extra_parameters = cls.extract_attributes(
|
|
80
81
|
embeddings_config
|
|
@@ -27,22 +27,32 @@ from rasa.shared.utils.common import conditional_import
|
|
|
27
27
|
|
|
28
28
|
# components dependent on tensorflow
|
|
29
29
|
TEDPolicy, TED_POLICY_AVAILABLE = conditional_import(
|
|
30
|
-
"rasa.core.policies.ted_policy", "TEDPolicy"
|
|
30
|
+
"rasa.core.policies.ted_policy", "TEDPolicy", check_installation_setup=True
|
|
31
31
|
)
|
|
32
32
|
UnexpecTEDIntentPolicy, UNEXPECTED_INTENT_POLICY_AVAILABLE = conditional_import(
|
|
33
|
-
"rasa.core.policies.unexpected_intent_policy",
|
|
33
|
+
"rasa.core.policies.unexpected_intent_policy",
|
|
34
|
+
"UnexpecTEDIntentPolicy",
|
|
35
|
+
check_installation_setup=True,
|
|
34
36
|
)
|
|
35
37
|
DIETClassifier, DIET_CLASSIFIER_AVAILABLE = conditional_import(
|
|
36
|
-
"rasa.nlu.classifiers.diet_classifier",
|
|
38
|
+
"rasa.nlu.classifiers.diet_classifier",
|
|
39
|
+
"DIETClassifier",
|
|
40
|
+
check_installation_setup=True,
|
|
37
41
|
)
|
|
38
42
|
ConveRTFeaturizer, CONVERT_FEATURIZER_AVAILABLE = conditional_import(
|
|
39
|
-
"rasa.nlu.featurizers.dense_featurizer.convert_featurizer",
|
|
43
|
+
"rasa.nlu.featurizers.dense_featurizer.convert_featurizer",
|
|
44
|
+
"ConveRTFeaturizer",
|
|
45
|
+
check_installation_setup=True,
|
|
40
46
|
)
|
|
41
47
|
LanguageModelFeaturizer, LANGUAGE_MODEL_FEATURIZER_AVAILABLE = conditional_import(
|
|
42
|
-
"rasa.nlu.featurizers.dense_featurizer.lm_featurizer",
|
|
48
|
+
"rasa.nlu.featurizers.dense_featurizer.lm_featurizer",
|
|
49
|
+
"LanguageModelFeaturizer",
|
|
50
|
+
check_installation_setup=True,
|
|
43
51
|
)
|
|
44
52
|
ResponseSelector, RESPONSE_SELECTOR_AVAILABLE = conditional_import(
|
|
45
|
-
"rasa.nlu.selectors.response_selector",
|
|
53
|
+
"rasa.nlu.selectors.response_selector",
|
|
54
|
+
"ResponseSelector",
|
|
55
|
+
check_installation_setup=True,
|
|
46
56
|
)
|
|
47
57
|
|
|
48
58
|
# components dependent on skops
|
|
@@ -40,16 +40,22 @@ from rasa.shared.utils.common import conditional_import
|
|
|
40
40
|
|
|
41
41
|
# Conditional imports for TensorFlow-dependent components
|
|
42
42
|
TEDPolicy, TED_POLICY_AVAILABLE = conditional_import(
|
|
43
|
-
"rasa.core.policies.ted_policy", "TEDPolicy"
|
|
43
|
+
"rasa.core.policies.ted_policy", "TEDPolicy", check_installation_setup=True
|
|
44
44
|
)
|
|
45
45
|
UnexpecTEDIntentPolicy, UNEXPECTED_INTENT_POLICY_AVAILABLE = conditional_import(
|
|
46
|
-
"rasa.core.policies.unexpected_intent_policy",
|
|
46
|
+
"rasa.core.policies.unexpected_intent_policy",
|
|
47
|
+
"UnexpecTEDIntentPolicy",
|
|
48
|
+
check_installation_setup=True,
|
|
47
49
|
)
|
|
48
50
|
DIETClassifier, DIET_CLASSIFIER_AVAILABLE = conditional_import(
|
|
49
|
-
"rasa.nlu.classifiers.diet_classifier",
|
|
51
|
+
"rasa.nlu.classifiers.diet_classifier",
|
|
52
|
+
"DIETClassifier",
|
|
53
|
+
check_installation_setup=True,
|
|
50
54
|
)
|
|
51
55
|
ResponseSelector, RESPONSE_SELECTOR_AVAILABLE = conditional_import(
|
|
52
|
-
"rasa.nlu.selectors.response_selector",
|
|
56
|
+
"rasa.nlu.selectors.response_selector",
|
|
57
|
+
"ResponseSelector",
|
|
58
|
+
check_installation_setup=True,
|
|
53
59
|
)
|
|
54
60
|
|
|
55
61
|
# Conditional imports for nlu components requiring other dependencies than tensorflow
|
|
@@ -9,9 +9,11 @@ from typing import Any, Dict, List, Optional, Text, Tuple, Type, TypeVar, Union
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import scipy.sparse
|
|
11
11
|
|
|
12
|
+
from rasa.utils.installation_utils import check_for_installation_issues
|
|
12
13
|
from rasa.utils.tensorflow import TENSORFLOW_AVAILABLE
|
|
13
14
|
|
|
14
15
|
if TENSORFLOW_AVAILABLE:
|
|
16
|
+
check_for_installation_issues()
|
|
15
17
|
import tensorflow as tf
|
|
16
18
|
else:
|
|
17
19
|
tf = None
|