rasa-pro 3.14.0rc4__py3-none-any.whl → 3.15.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/agents/agent_manager.py +7 -5
- rasa/agents/protocol/a2a/a2a_agent.py +13 -11
- rasa/agents/protocol/mcp/mcp_base_agent.py +49 -11
- rasa/agents/validation.py +4 -2
- rasa/builder/config.py +4 -0
- rasa/builder/copilot/copilot.py +28 -9
- rasa/builder/copilot/copilot_templated_message_provider.py +1 -1
- rasa/builder/copilot/models.py +171 -4
- rasa/builder/document_retrieval/inkeep_document_retrieval.py +2 -0
- rasa/builder/download.py +1 -1
- rasa/builder/service.py +101 -24
- rasa/builder/telemetry/__init__.py +0 -0
- rasa/builder/telemetry/copilot_langfuse_telemetry.py +384 -0
- rasa/builder/{copilot/telemetry.py → telemetry/copilot_segment_telemetry.py} +21 -3
- rasa/builder/validation_service.py +4 -0
- rasa/cli/arguments/data.py +9 -0
- rasa/cli/data.py +72 -6
- rasa/cli/interactive.py +3 -0
- rasa/cli/llm_fine_tuning.py +1 -0
- rasa/cli/project_templates/defaults.py +1 -0
- rasa/cli/validation/bot_config.py +2 -0
- rasa/constants.py +2 -1
- rasa/core/actions/action_exceptions.py +1 -1
- rasa/core/agent.py +4 -1
- rasa/core/available_agents.py +1 -1
- rasa/core/exceptions.py +1 -1
- rasa/core/featurizers/tracker_featurizers.py +3 -2
- rasa/core/persistor.py +7 -7
- rasa/core/policies/flows/agent_executor.py +84 -4
- rasa/core/policies/flows/flow_exceptions.py +5 -2
- rasa/core/policies/flows/flow_executor.py +23 -8
- rasa/core/policies/flows/mcp_tool_executor.py +7 -1
- rasa/core/policies/rule_policy.py +1 -1
- rasa/core/run.py +15 -4
- rasa/dialogue_understanding/commands/cancel_flow_command.py +1 -1
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +1 -1
- rasa/e2e_test/e2e_config.py +4 -3
- rasa/engine/recipes/default_components.py +16 -6
- rasa/graph_components/validators/default_recipe_validator.py +10 -4
- rasa/model_manager/runner_service.py +1 -1
- rasa/nlu/classifiers/diet_classifier.py +2 -0
- rasa/privacy/privacy_config.py +1 -1
- rasa/shared/agents/auth/auth_strategy/oauth2_auth_strategy.py +4 -7
- rasa/shared/core/slots.py +55 -24
- rasa/shared/core/training_data/story_reader/story_reader.py +1 -1
- rasa/shared/exceptions.py +23 -2
- rasa/shared/providers/llm/litellm_router_llm_client.py +2 -2
- rasa/shared/utils/common.py +9 -1
- rasa/shared/utils/llm.py +21 -4
- rasa/shared/utils/mcp/server_connection.py +7 -4
- rasa/studio/download.py +3 -0
- rasa/studio/prompts.py +1 -0
- rasa/studio/upload.py +4 -0
- rasa/utils/common.py +9 -0
- rasa/utils/endpoints.py +2 -0
- rasa/utils/installation_utils.py +111 -0
- rasa/utils/log_utils.py +20 -1
- rasa/utils/tensorflow/callback.py +2 -0
- rasa/utils/train_utils.py +2 -0
- rasa/version.py +1 -1
- {rasa_pro-3.14.0rc4.dist-info → rasa_pro-3.15.0a1.dist-info}/METADATA +4 -2
- {rasa_pro-3.14.0rc4.dist-info → rasa_pro-3.15.0a1.dist-info}/RECORD +65 -62
- {rasa_pro-3.14.0rc4.dist-info → rasa_pro-3.15.0a1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.14.0rc4.dist-info → rasa_pro-3.15.0a1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.14.0rc4.dist-info → rasa_pro-3.15.0a1.dist-info}/entry_points.txt +0 -0
rasa/builder/service.py
CHANGED
|
@@ -5,6 +5,7 @@ import time
|
|
|
5
5
|
from http import HTTPStatus
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
|
|
8
|
+
import langfuse
|
|
8
9
|
import structlog
|
|
9
10
|
from sanic import Blueprint, HTTPResponse, response
|
|
10
11
|
from sanic.request import Request
|
|
@@ -41,7 +42,6 @@ from rasa.builder.copilot.signing import (
|
|
|
41
42
|
create_signature_envelope_for_text,
|
|
42
43
|
verify_signature,
|
|
43
44
|
)
|
|
44
|
-
from rasa.builder.copilot.telemetry import CopilotTelemetry
|
|
45
45
|
from rasa.builder.download import create_bot_project_archive
|
|
46
46
|
from rasa.builder.guardrails.constants import (
|
|
47
47
|
BLOCK_SCOPE_PROJECT,
|
|
@@ -65,6 +65,7 @@ from rasa.builder.models import (
|
|
|
65
65
|
ApiErrorResponse,
|
|
66
66
|
AssistantInfo,
|
|
67
67
|
BotData,
|
|
68
|
+
BotFiles,
|
|
68
69
|
JobCreateResponse,
|
|
69
70
|
JobStatus,
|
|
70
71
|
JobStatusEvent,
|
|
@@ -74,6 +75,8 @@ from rasa.builder.models import (
|
|
|
74
75
|
)
|
|
75
76
|
from rasa.builder.project_generator import ProjectGenerator
|
|
76
77
|
from rasa.builder.shared.tracker_context import TrackerContext
|
|
78
|
+
from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
|
|
79
|
+
from rasa.builder.telemetry.copilot_segment_telemetry import CopilotSegmentTelemetry
|
|
77
80
|
from rasa.core.agent import Agent
|
|
78
81
|
from rasa.core.channels.studio_chat import StudioChatInput
|
|
79
82
|
from rasa.core.exceptions import AgentNotReady
|
|
@@ -1020,6 +1023,9 @@ async def download_bot_project(request: Request) -> HTTPResponse:
|
|
|
1020
1023
|
schema=str,
|
|
1021
1024
|
)
|
|
1022
1025
|
@protected()
|
|
1026
|
+
# Disable automatic input/output capture for langfuse tracing
|
|
1027
|
+
# This allows manual control over what data is sent to langfuse
|
|
1028
|
+
@langfuse.observe(capture_input=False, capture_output=False)
|
|
1023
1029
|
async def copilot(request: Request) -> None:
|
|
1024
1030
|
"""Handle copilot requests with streaming markdown responses."""
|
|
1025
1031
|
sse = await request.respond(content_type="text/event-stream")
|
|
@@ -1046,9 +1052,12 @@ async def copilot(request: Request) -> None:
|
|
|
1046
1052
|
)
|
|
1047
1053
|
return
|
|
1048
1054
|
|
|
1049
|
-
telemetry =
|
|
1055
|
+
telemetry = CopilotSegmentTelemetry(
|
|
1056
|
+
project_id=HELLO_RASA_PROJECT_ID, user_id=user_id
|
|
1057
|
+
)
|
|
1050
1058
|
structlogger.debug("builder.copilot.telemetry.request.init")
|
|
1051
1059
|
|
|
1060
|
+
# TODO: This can be removed once Langfuse is completed.
|
|
1052
1061
|
if req.last_message and req.last_message.role == ROLE_USER:
|
|
1053
1062
|
structlogger.debug("builder.copilot.telemetry.request.user_turn")
|
|
1054
1063
|
# Offload telemetry logging to a background task
|
|
@@ -1088,26 +1097,9 @@ async def copilot(request: Request) -> None:
|
|
|
1088
1097
|
return
|
|
1089
1098
|
|
|
1090
1099
|
# 4. Get the necessary context for the copilot
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
)
|
|
1095
|
-
if (
|
|
1096
|
-
tracker_context is not None
|
|
1097
|
-
and llm_service.guardrails_policy_checker is not None
|
|
1098
|
-
):
|
|
1099
|
-
tracker_context = await llm_service.guardrails_policy_checker.check_assistant_chat_for_policy_violations( # noqa: E501
|
|
1100
|
-
tracker_context=tracker_context,
|
|
1101
|
-
hello_rasa_user_id=user_id,
|
|
1102
|
-
hello_rasa_project_id=HELLO_RASA_PROJECT_ID,
|
|
1103
|
-
lakera_project_id=LAKERA_ASSISTANT_HISTORY_GUARDRAIL_PROJECT_ID,
|
|
1104
|
-
)
|
|
1105
|
-
|
|
1106
|
-
# Copilot doesn't need to know about the docs and any file that is not a core
|
|
1107
|
-
# assistant file
|
|
1108
|
-
relevant_assistant_files = project_generator.get_bot_files(
|
|
1109
|
-
exclude_docs_directory=True,
|
|
1110
|
-
allowed_file_extensions=["yaml", "yml", "py", "jinja", "jinja2"],
|
|
1100
|
+
tracker_context = await get_tracker_context_for_copilot(request, req, user_id)
|
|
1101
|
+
relevant_assistant_files = get_relevant_assistant_files_for_copilot(
|
|
1102
|
+
project_generator,
|
|
1111
1103
|
)
|
|
1112
1104
|
context = CopilotContext(
|
|
1113
1105
|
tracker_context=tracker_context,
|
|
@@ -1162,7 +1154,7 @@ async def copilot(request: Request) -> None:
|
|
|
1162
1154
|
async for token in intercepted_stream:
|
|
1163
1155
|
await sse.send(token.to_sse_event().format())
|
|
1164
1156
|
|
|
1165
|
-
#
|
|
1157
|
+
# 8a. Offload metabase telemetry logging to a background task
|
|
1166
1158
|
request.app.add_task(
|
|
1167
1159
|
asyncio.to_thread(
|
|
1168
1160
|
telemetry.log_copilot_from_handler,
|
|
@@ -1177,9 +1169,27 @@ async def copilot(request: Request) -> None:
|
|
|
1177
1169
|
else None
|
|
1178
1170
|
),
|
|
1179
1171
|
tracker_event_attachments=generation_context.tracker_event_attachments,
|
|
1180
|
-
|
|
1172
|
+
model=copilot_client.usage_statistics.model or "N/A",
|
|
1173
|
+
prompt_tokens=copilot_client.usage_statistics.prompt_tokens or 0,
|
|
1174
|
+
cached_prompt_tokens=(
|
|
1175
|
+
copilot_client.usage_statistics.cached_prompt_tokens or 0
|
|
1176
|
+
),
|
|
1177
|
+
completion_tokens=(
|
|
1178
|
+
copilot_client.usage_statistics.completion_tokens or 0
|
|
1179
|
+
),
|
|
1180
|
+
total_tokens=copilot_client.usage_statistics.total_tokens or 0,
|
|
1181
1181
|
)
|
|
1182
1182
|
)
|
|
1183
|
+
# 8b. Setup output trace attributes for Langfuse
|
|
1184
|
+
CopilotLangfuseTelemetry.setup_copilot_endpoint_call_trace_attributes(
|
|
1185
|
+
hello_rasa_project_id=HELLO_RASA_PROJECT_ID or "N/A",
|
|
1186
|
+
chat_id=req.session_id or "N/A",
|
|
1187
|
+
user_id=user_id,
|
|
1188
|
+
request=req,
|
|
1189
|
+
handler=copilot_response_handler,
|
|
1190
|
+
relevant_documents=generation_context.relevant_documents,
|
|
1191
|
+
copilot_context=context,
|
|
1192
|
+
)
|
|
1183
1193
|
|
|
1184
1194
|
# 9. Once the stream is over, extract and send references
|
|
1185
1195
|
# if any documents were used
|
|
@@ -1365,3 +1375,70 @@ async def _handle_guardrail_violation_and_maybe_block(
|
|
|
1365
1375
|
|
|
1366
1376
|
await sse.send(message.to_sse_event().format())
|
|
1367
1377
|
return message
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
@langfuse.observe(capture_input=False, capture_output=False)
|
|
1381
|
+
async def get_tracker_context_for_copilot(
|
|
1382
|
+
request: Request,
|
|
1383
|
+
req: CopilotRequest,
|
|
1384
|
+
user_id: str,
|
|
1385
|
+
) -> Optional[TrackerContext]:
|
|
1386
|
+
"""Check the assistant chat for guardrail policy violations.
|
|
1387
|
+
|
|
1388
|
+
Args:
|
|
1389
|
+
request: The request object.
|
|
1390
|
+
req: The CopilotRequest object.
|
|
1391
|
+
user_id: The user ID.
|
|
1392
|
+
|
|
1393
|
+
Returns:
|
|
1394
|
+
The tracker context if the tracker is available.
|
|
1395
|
+
"""
|
|
1396
|
+
tracker = await current_tracker_from_input_channel(request.app, req.session_id)
|
|
1397
|
+
tracker_context = TrackerContext.from_tracker(
|
|
1398
|
+
tracker, max_turns=COPILOT_ASSISTANT_TRACKER_MAX_TURNS
|
|
1399
|
+
)
|
|
1400
|
+
if (
|
|
1401
|
+
tracker_context is not None
|
|
1402
|
+
and llm_service.guardrails_policy_checker is not None
|
|
1403
|
+
):
|
|
1404
|
+
tracker_context = await llm_service.guardrails_policy_checker.check_assistant_chat_for_policy_violations( # noqa: E501
|
|
1405
|
+
tracker_context=tracker_context,
|
|
1406
|
+
hello_rasa_user_id=user_id,
|
|
1407
|
+
hello_rasa_project_id=HELLO_RASA_PROJECT_ID,
|
|
1408
|
+
lakera_project_id=LAKERA_ASSISTANT_HISTORY_GUARDRAIL_PROJECT_ID,
|
|
1409
|
+
)
|
|
1410
|
+
|
|
1411
|
+
# Track the retrieved tracker context
|
|
1412
|
+
CopilotLangfuseTelemetry.trace_copilot_tracker_context(
|
|
1413
|
+
tracker_context=tracker_context,
|
|
1414
|
+
max_conversation_turns=COPILOT_ASSISTANT_TRACKER_MAX_TURNS,
|
|
1415
|
+
session_id=req.session_id,
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
return tracker_context
|
|
1419
|
+
|
|
1420
|
+
|
|
1421
|
+
@langfuse.observe(capture_input=False, capture_output=False)
|
|
1422
|
+
def get_relevant_assistant_files_for_copilot(
|
|
1423
|
+
project_generator: ProjectGenerator,
|
|
1424
|
+
) -> BotFiles:
|
|
1425
|
+
"""Get the relevant assistant files for the copilot.
|
|
1426
|
+
|
|
1427
|
+
Args:
|
|
1428
|
+
project_generator: The project generator.
|
|
1429
|
+
|
|
1430
|
+
Returns:
|
|
1431
|
+
The relevant assistant files.
|
|
1432
|
+
"""
|
|
1433
|
+
# Copilot doesn't need to know about the docs and any file that is not a core
|
|
1434
|
+
# assistant file
|
|
1435
|
+
files = project_generator.get_bot_files(
|
|
1436
|
+
exclude_docs_directory=True,
|
|
1437
|
+
allowed_file_extensions=["yaml", "yml", "py", "jinja", "jinja2"],
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
# Track the retrieved assistant files
|
|
1441
|
+
CopilotLangfuseTelemetry.trace_copilot_relevant_assistant_files(
|
|
1442
|
+
relevant_assistant_files=files,
|
|
1443
|
+
)
|
|
1444
|
+
return files
|
|
File without changes
|
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
from typing import (
|
|
3
|
+
TYPE_CHECKING,
|
|
4
|
+
Any,
|
|
5
|
+
AsyncGenerator,
|
|
6
|
+
Callable,
|
|
7
|
+
Dict,
|
|
8
|
+
List,
|
|
9
|
+
Optional,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from rasa.builder.copilot.copilot import Copilot
|
|
14
|
+
from rasa.builder.copilot.models import CopilotContext
|
|
15
|
+
from rasa.builder.document_retrieval.inkeep_document_retrieval import (
|
|
16
|
+
InKeepDocumentRetrieval,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
import langfuse
|
|
20
|
+
import structlog
|
|
21
|
+
|
|
22
|
+
from rasa.builder.copilot.copilot_response_handler import CopilotResponseHandler
|
|
23
|
+
from rasa.builder.copilot.models import (
|
|
24
|
+
CopilotRequest,
|
|
25
|
+
EventContent,
|
|
26
|
+
UsageStatistics,
|
|
27
|
+
UserChatMessage,
|
|
28
|
+
)
|
|
29
|
+
from rasa.builder.document_retrieval.models import Document
|
|
30
|
+
from rasa.builder.models import BotFiles
|
|
31
|
+
from rasa.builder.shared.tracker_context import TrackerContext
|
|
32
|
+
|
|
33
|
+
structlogger = structlog.get_logger()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CopilotLangfuseTelemetry:
|
|
37
|
+
@staticmethod
|
|
38
|
+
def trace_copilot_tracker_context(
|
|
39
|
+
tracker_context: Optional[TrackerContext],
|
|
40
|
+
max_conversation_turns: int,
|
|
41
|
+
session_id: str,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Trace the copilot tracker context.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
tracker_context: The tracker context.
|
|
47
|
+
max_conversation_turns: The maximum number of conversation turns to be
|
|
48
|
+
fetched from the tracker.
|
|
49
|
+
session_id: The session ID used to fetch the right tracker.
|
|
50
|
+
"""
|
|
51
|
+
langfuse_client = langfuse.get_client()
|
|
52
|
+
# Use `update_current_span` to update the current span of the trace.
|
|
53
|
+
langfuse_client.update_current_span(
|
|
54
|
+
output={
|
|
55
|
+
"tracker_context": (
|
|
56
|
+
tracker_context.model_dump() if tracker_context else None
|
|
57
|
+
),
|
|
58
|
+
},
|
|
59
|
+
metadata={
|
|
60
|
+
"max_conversation_turns": max_conversation_turns,
|
|
61
|
+
"session_id": session_id,
|
|
62
|
+
},
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def trace_copilot_relevant_assistant_files(
|
|
67
|
+
relevant_assistant_files: BotFiles,
|
|
68
|
+
) -> None:
|
|
69
|
+
"""Trace the copilot relevant assistant files.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
relevant_assistant_files: The relevant assistant files.
|
|
73
|
+
"""
|
|
74
|
+
langfuse_client = langfuse.get_client()
|
|
75
|
+
# Use `update_current_span` to update the current span of the trace.
|
|
76
|
+
langfuse_client.update_current_span(
|
|
77
|
+
output={
|
|
78
|
+
"relevant_assistant_files": relevant_assistant_files,
|
|
79
|
+
},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def setup_copilot_endpoint_call_trace_attributes(
|
|
84
|
+
hello_rasa_project_id: str,
|
|
85
|
+
chat_id: str,
|
|
86
|
+
user_id: str,
|
|
87
|
+
request: CopilotRequest,
|
|
88
|
+
handler: CopilotResponseHandler,
|
|
89
|
+
relevant_documents: list[Document],
|
|
90
|
+
copilot_context: "CopilotContext",
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Set up the current langfuse trace with project and user context.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
hello_rasa_project_id: The Hello Rasa project ID.
|
|
96
|
+
chat_id: The chat/conversation ID.
|
|
97
|
+
user_id: The user ID.
|
|
98
|
+
request: The parsed CopilotRequest object.
|
|
99
|
+
handler: The response handler containing generated responses.
|
|
100
|
+
relevant_documents: The relevant documents used to generate the response.
|
|
101
|
+
"""
|
|
102
|
+
langfuse_client = langfuse.get_client()
|
|
103
|
+
user_message = CopilotLangfuseTelemetry._extract_last_user_message_content(
|
|
104
|
+
request
|
|
105
|
+
)
|
|
106
|
+
tracker_event_attachments = (
|
|
107
|
+
CopilotLangfuseTelemetry._extract_tracker_event_attachments(request)
|
|
108
|
+
)
|
|
109
|
+
response_category = CopilotLangfuseTelemetry._extract_response_category(handler)
|
|
110
|
+
reference_section_entries = CopilotLangfuseTelemetry._extract_references(
|
|
111
|
+
handler, relevant_documents
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Create a session ID as a composite ID from project id, user id and chat id
|
|
115
|
+
session_id = CopilotLangfuseTelemetry._create_session_id(
|
|
116
|
+
hello_rasa_project_id, user_id, chat_id
|
|
117
|
+
)
|
|
118
|
+
# Use `update_current_trace` to update the top level trace.
|
|
119
|
+
langfuse_client.update_current_trace(
|
|
120
|
+
user_id=user_id,
|
|
121
|
+
session_id=session_id,
|
|
122
|
+
input={
|
|
123
|
+
"message": user_message,
|
|
124
|
+
"tracker_event_attachments": tracker_event_attachments,
|
|
125
|
+
},
|
|
126
|
+
output={
|
|
127
|
+
"answer": CopilotLangfuseTelemetry._full_text(handler),
|
|
128
|
+
"response_category": response_category,
|
|
129
|
+
"references": reference_section_entries,
|
|
130
|
+
},
|
|
131
|
+
metadata={
|
|
132
|
+
"ids": {
|
|
133
|
+
"user_id": user_id,
|
|
134
|
+
"project_id": hello_rasa_project_id,
|
|
135
|
+
"chat_history_id": chat_id,
|
|
136
|
+
},
|
|
137
|
+
"copilot_additional_context": {
|
|
138
|
+
"relevant_documents": [
|
|
139
|
+
doc.model_dump() for doc in relevant_documents
|
|
140
|
+
],
|
|
141
|
+
"relevant_assistant_files": copilot_context.assistant_files,
|
|
142
|
+
"assistant_tracker_context": (
|
|
143
|
+
copilot_context.tracker_context.model_dump()
|
|
144
|
+
if copilot_context.tracker_context
|
|
145
|
+
else None
|
|
146
|
+
),
|
|
147
|
+
"assistant_logs": copilot_context.assistant_logs,
|
|
148
|
+
"copilot_chat_history": [
|
|
149
|
+
message.model_dump()
|
|
150
|
+
for message in copilot_context.copilot_chat_history
|
|
151
|
+
],
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
tags=[response_category] if response_category else [],
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def trace_copilot_streaming_generation(
|
|
159
|
+
func: Callable[..., AsyncGenerator[str, None]],
|
|
160
|
+
) -> Callable[..., AsyncGenerator[str, None]]:
|
|
161
|
+
"""Custom decorator for tracing async streaming of the Copilot's LLM generation.
|
|
162
|
+
|
|
163
|
+
This decorator handles Langfuse tracing for async streaming of the Copilot's LLM
|
|
164
|
+
generation by manually managing the generation span and updating it with usage
|
|
165
|
+
statistics after the stream completes.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
@wraps(func)
|
|
169
|
+
async def wrapper(
|
|
170
|
+
self: "Copilot", messages: List[Dict[str, Any]]
|
|
171
|
+
) -> AsyncGenerator[str, None]:
|
|
172
|
+
langfuse_client = langfuse.get_client()
|
|
173
|
+
|
|
174
|
+
with langfuse_client.start_as_current_generation(
|
|
175
|
+
name=f"{self.__class__.__name__}.{func.__name__}",
|
|
176
|
+
input={"messages": messages},
|
|
177
|
+
) as generation:
|
|
178
|
+
output = []
|
|
179
|
+
# Call the original streaming function and start capturing the output
|
|
180
|
+
async for chunk in func(self, messages):
|
|
181
|
+
output.append(chunk)
|
|
182
|
+
yield chunk
|
|
183
|
+
|
|
184
|
+
# Update the span's model parameters and output after streaming is
|
|
185
|
+
# complete
|
|
186
|
+
generation.update(
|
|
187
|
+
model_parameters=self.llm_config, output="".join(output)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Update the span's usage statistics after streaming is complete
|
|
191
|
+
if self.usage_statistics:
|
|
192
|
+
CopilotLangfuseTelemetry._update_generation_span_with_usage_statistics(
|
|
193
|
+
generation, self.usage_statistics
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return wrapper
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def trace_document_retrieval_generation(
|
|
200
|
+
func: Callable[..., Any],
|
|
201
|
+
) -> Callable[..., Any]:
|
|
202
|
+
"""Custom decorator for tracing document retrieval generation with Langfuse.
|
|
203
|
+
|
|
204
|
+
This decorator handles Langfuse tracing for document retrieval API calls
|
|
205
|
+
by manually managing the generation span and updating it with usage statistics.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
@wraps(func)
|
|
209
|
+
async def wrapper(
|
|
210
|
+
self: "InKeepDocumentRetrieval",
|
|
211
|
+
query: str,
|
|
212
|
+
temperature: float,
|
|
213
|
+
timeout: float,
|
|
214
|
+
) -> Any:
|
|
215
|
+
langfuse_client = langfuse.get_client()
|
|
216
|
+
|
|
217
|
+
with langfuse_client.start_as_current_generation(
|
|
218
|
+
name=f"{self.__class__.__name__}.{func.__name__}",
|
|
219
|
+
input={
|
|
220
|
+
"query": query,
|
|
221
|
+
"temperature": temperature,
|
|
222
|
+
"timeout": timeout,
|
|
223
|
+
},
|
|
224
|
+
) as generation:
|
|
225
|
+
# Call the original function
|
|
226
|
+
response = await func(self, query, temperature, timeout)
|
|
227
|
+
|
|
228
|
+
# Update the span with response content
|
|
229
|
+
generation.update(
|
|
230
|
+
output=response,
|
|
231
|
+
model_parameters={
|
|
232
|
+
"temperature": str(temperature),
|
|
233
|
+
"timeout": str(timeout),
|
|
234
|
+
},
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Update usage statistics if available
|
|
238
|
+
usage_statistics = UsageStatistics.from_chat_completion_response(
|
|
239
|
+
response
|
|
240
|
+
)
|
|
241
|
+
if usage_statistics:
|
|
242
|
+
CopilotLangfuseTelemetry._update_generation_span_with_usage_statistics(
|
|
243
|
+
generation, usage_statistics
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return response
|
|
247
|
+
|
|
248
|
+
return wrapper
|
|
249
|
+
|
|
250
|
+
@staticmethod
|
|
251
|
+
def _extract_last_user_message_content(request: CopilotRequest) -> Optional[str]:
|
|
252
|
+
"""Extract the last user message from the CopilotRequest object.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
request: The CopilotRequest object.
|
|
256
|
+
"""
|
|
257
|
+
if not isinstance(request.last_message, UserChatMessage):
|
|
258
|
+
return None
|
|
259
|
+
return request.last_message.get_flattened_text_content()
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def _extract_tracker_event_attachments(
|
|
263
|
+
request: CopilotRequest,
|
|
264
|
+
) -> list[Dict[str, Any]]:
|
|
265
|
+
"""Extract tracker event attachments from the last user message.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
request: The CopilotRequest object.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
The event content block sent with the last user message in the
|
|
272
|
+
dictionary format.
|
|
273
|
+
"""
|
|
274
|
+
last_message = request.last_message
|
|
275
|
+
if not isinstance(last_message, UserChatMessage):
|
|
276
|
+
return []
|
|
277
|
+
return [
|
|
278
|
+
attachment.model_dump()
|
|
279
|
+
for attachment in last_message.get_content_blocks_by_type(EventContent)
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def _extract_response_category(handler: CopilotResponseHandler) -> Optional[str]:
|
|
284
|
+
"""Extract the response category from the response handler.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
handler: The response handler containing generated response.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
The response category of the first generated response, or None if no
|
|
291
|
+
responses.
|
|
292
|
+
"""
|
|
293
|
+
if not handler.generated_responses:
|
|
294
|
+
return None
|
|
295
|
+
# The handler contains multiple chunks of one response. We use the first chunk's
|
|
296
|
+
# response category.
|
|
297
|
+
return handler.generated_responses[0].response_category.value
|
|
298
|
+
|
|
299
|
+
@staticmethod
|
|
300
|
+
def _full_text(handler: CopilotResponseHandler) -> str:
|
|
301
|
+
"""Extract full text from the response handler.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
handler: The response handler containing generated responses.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
The concatenated content of all generated responses.
|
|
308
|
+
"""
|
|
309
|
+
return "".join(
|
|
310
|
+
response.content
|
|
311
|
+
for response in handler.generated_responses
|
|
312
|
+
if getattr(response, "content", None)
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
def _extract_references(
|
|
317
|
+
handler: CopilotResponseHandler,
|
|
318
|
+
relevant_documents: list[Document],
|
|
319
|
+
) -> List[Dict[str, Any]]:
|
|
320
|
+
"""Extract reference entries from the response handler.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
handler: The response handler containing generated responses.
|
|
324
|
+
relevant_documents: The relevant documents used to generate the response.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
A list of reference entries in dictionary format.
|
|
328
|
+
"""
|
|
329
|
+
if not relevant_documents:
|
|
330
|
+
return []
|
|
331
|
+
|
|
332
|
+
reference_entries: list[Dict[str, Any]] = []
|
|
333
|
+
reference_section = handler.extract_references(relevant_documents)
|
|
334
|
+
for reference_entry in reference_section.references:
|
|
335
|
+
reference_entries.append(
|
|
336
|
+
reference_entry.model_dump(
|
|
337
|
+
exclude={"response_category", "response_completeness"}
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return reference_entries
|
|
342
|
+
|
|
343
|
+
@staticmethod
|
|
344
|
+
def _update_generation_span_with_usage_statistics(
|
|
345
|
+
generation_span: langfuse.LangfuseGeneration,
|
|
346
|
+
usage_statistics: UsageStatistics,
|
|
347
|
+
) -> None:
|
|
348
|
+
"""Update the generation span with the usage statistics.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
generation_span: The generation span.
|
|
352
|
+
usage_statistics: The usage statistics of the generation.
|
|
353
|
+
"""
|
|
354
|
+
generation_span.update(
|
|
355
|
+
usage_details={
|
|
356
|
+
"input_non_cached_usage": (
|
|
357
|
+
usage_statistics.non_cached_prompt_tokens or 0
|
|
358
|
+
),
|
|
359
|
+
"input_cached_usage": usage_statistics.cached_prompt_tokens or 0,
|
|
360
|
+
"output_usage": usage_statistics.completion_tokens or 0,
|
|
361
|
+
"total": usage_statistics.total_tokens or 0,
|
|
362
|
+
},
|
|
363
|
+
cost_details={
|
|
364
|
+
"input_non_cached_cost": usage_statistics.non_cached_cost or 0,
|
|
365
|
+
"input_cached_cost": usage_statistics.cached_cost or 0,
|
|
366
|
+
"output_cost": usage_statistics.output_cost or 0,
|
|
367
|
+
"total": usage_statistics.total_cost or 0,
|
|
368
|
+
},
|
|
369
|
+
model=usage_statistics.model,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
@staticmethod
|
|
373
|
+
def _create_session_id(
|
|
374
|
+
hello_rasa_project_id: str,
|
|
375
|
+
user_id: str,
|
|
376
|
+
chat_id: str,
|
|
377
|
+
) -> str:
|
|
378
|
+
"""Create a session ID as a composite from project id, user id and chat id."""
|
|
379
|
+
pattern = "PID-{project_id}-UID-{user_id}-CID-{chat_id}"
|
|
380
|
+
return pattern.format(
|
|
381
|
+
project_id=hello_rasa_project_id,
|
|
382
|
+
user_id=user_id,
|
|
383
|
+
chat_id=chat_id,
|
|
384
|
+
)
|
|
@@ -1,14 +1,25 @@
|
|
|
1
1
|
import datetime as dt
|
|
2
2
|
import os
|
|
3
3
|
import uuid
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import (
|
|
5
|
+
TYPE_CHECKING,
|
|
6
|
+
Any,
|
|
7
|
+
Iterable,
|
|
8
|
+
Optional,
|
|
9
|
+
Sequence,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
pass
|
|
5
14
|
|
|
6
15
|
import structlog
|
|
7
16
|
|
|
8
17
|
from rasa import telemetry
|
|
9
18
|
from rasa.builder.copilot.constants import COPILOT_SEGMENT_WRITE_KEY_ENV_VAR
|
|
10
19
|
from rasa.builder.copilot.copilot_response_handler import CopilotResponseHandler
|
|
11
|
-
from rasa.builder.copilot.models import
|
|
20
|
+
from rasa.builder.copilot.models import (
|
|
21
|
+
EventContent,
|
|
22
|
+
)
|
|
12
23
|
from rasa.builder.document_retrieval.models import Document
|
|
13
24
|
from rasa.telemetry import (
|
|
14
25
|
SEGMENT_TRACK_ENDPOINT,
|
|
@@ -56,7 +67,7 @@ def _track(event: str, user_id: str, properties: dict) -> None:
|
|
|
56
67
|
structlogger.warning("builder.telemetry.track_failed", error=str(e))
|
|
57
68
|
|
|
58
69
|
|
|
59
|
-
class
|
|
70
|
+
class CopilotSegmentTelemetry:
|
|
60
71
|
def __init__(
|
|
61
72
|
self,
|
|
62
73
|
*,
|
|
@@ -96,6 +107,7 @@ class CopilotTelemetry:
|
|
|
96
107
|
latency_ms: int,
|
|
97
108
|
model: str,
|
|
98
109
|
input_tokens: Optional[int] = None,
|
|
110
|
+
cached_prompt_tokens: Optional[int] = None,
|
|
99
111
|
output_tokens: Optional[int] = None,
|
|
100
112
|
total_tokens: Optional[int] = None,
|
|
101
113
|
system_message: Optional[dict[str, Any]] = None,
|
|
@@ -112,6 +124,7 @@ class CopilotTelemetry:
|
|
|
112
124
|
latency_ms: End-to-end Copilot latency to produce this response.
|
|
113
125
|
model: The model used to generate the response.
|
|
114
126
|
input_tokens: Number of input tokens used (optional).
|
|
127
|
+
cached_prompt_tokens: Number of cached prompt tokens.
|
|
115
128
|
output_tokens: Number of output tokens generated (optional).
|
|
116
129
|
total_tokens: Total number of tokens used (input + output) (optional).
|
|
117
130
|
system_message: The system message used (optional).
|
|
@@ -135,6 +148,7 @@ class CopilotTelemetry:
|
|
|
135
148
|
"latency_ms": latency_ms,
|
|
136
149
|
"model": model,
|
|
137
150
|
"input_tokens": input_tokens,
|
|
151
|
+
"cached_prompt_tokens": cached_prompt_tokens,
|
|
138
152
|
"output_tokens": output_tokens,
|
|
139
153
|
"total_tokens": total_tokens,
|
|
140
154
|
"chat_history": chat_history,
|
|
@@ -193,6 +207,7 @@ class CopilotTelemetry:
|
|
|
193
207
|
latency_ms: int,
|
|
194
208
|
model: str,
|
|
195
209
|
prompt_tokens: int,
|
|
210
|
+
cached_prompt_tokens: int,
|
|
196
211
|
completion_tokens: int,
|
|
197
212
|
total_tokens: int,
|
|
198
213
|
system_message: dict[str, Any],
|
|
@@ -208,11 +223,13 @@ class CopilotTelemetry:
|
|
|
208
223
|
latency_ms: End-to-end Copilot latency to produce this response.
|
|
209
224
|
model: The model used to generate the response.
|
|
210
225
|
prompt_tokens: Number of input tokens used.
|
|
226
|
+
cached_prompt_tokens: Number of cached prompt tokens.
|
|
211
227
|
completion_tokens: Number of output tokens generated.
|
|
212
228
|
total_tokens: Total number of tokens used (input + output).
|
|
213
229
|
system_message: The system message used.
|
|
214
230
|
chat_history: The chat history messages used.
|
|
215
231
|
last_user_message: The last user message used.
|
|
232
|
+
tracker_event_attachments: List of tracker event attachments.
|
|
216
233
|
"""
|
|
217
234
|
structlogger.debug("builder.telemetry.log_copilot_from_handler")
|
|
218
235
|
text = self._full_text(handler)
|
|
@@ -223,6 +240,7 @@ class CopilotTelemetry:
|
|
|
223
240
|
latency_ms=latency_ms,
|
|
224
241
|
model=model,
|
|
225
242
|
input_tokens=prompt_tokens,
|
|
243
|
+
cached_prompt_tokens=cached_prompt_tokens,
|
|
226
244
|
output_tokens=completion_tokens,
|
|
227
245
|
total_tokens=total_tokens,
|
|
228
246
|
system_message=system_message,
|
|
@@ -47,6 +47,10 @@ async def validate_project(importer: TrainingDataImporter) -> Optional[str]:
|
|
|
47
47
|
with capture_validation_logs() as captured_logs:
|
|
48
48
|
try:
|
|
49
49
|
with _mock_sys_exit() as exit_tracker:
|
|
50
|
+
from rasa.core.config.configuration import Configuration
|
|
51
|
+
|
|
52
|
+
Configuration.initialise_empty()
|
|
53
|
+
|
|
50
54
|
validate_files(
|
|
51
55
|
fail_on_warnings=config.VALIDATION_FAIL_ON_WARNINGS,
|
|
52
56
|
max_history=config.VALIDATION_MAX_HISTORY,
|