rasa-pro 3.14.1__py3-none-any.whl → 3.15.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/builder/config.py +4 -0
- rasa/builder/copilot/copilot.py +28 -9
- rasa/builder/copilot/models.py +171 -4
- rasa/builder/document_retrieval/inkeep_document_retrieval.py +2 -0
- rasa/builder/download.py +1 -1
- rasa/builder/service.py +101 -24
- rasa/builder/telemetry/__init__.py +0 -0
- rasa/builder/telemetry/copilot_langfuse_telemetry.py +384 -0
- rasa/builder/{copilot/telemetry.py → telemetry/copilot_segment_telemetry.py} +21 -3
- rasa/constants.py +1 -0
- rasa/core/policies/flows/flow_executor.py +20 -6
- rasa/core/run.py +15 -4
- rasa/e2e_test/e2e_config.py +4 -3
- rasa/engine/recipes/default_components.py +16 -6
- rasa/graph_components/validators/default_recipe_validator.py +10 -4
- rasa/nlu/classifiers/diet_classifier.py +2 -0
- rasa/shared/core/slots.py +55 -24
- rasa/shared/utils/common.py +9 -1
- rasa/utils/common.py +9 -0
- rasa/utils/endpoints.py +2 -0
- rasa/utils/installation_utils.py +111 -0
- rasa/utils/tensorflow/callback.py +2 -0
- rasa/utils/train_utils.py +2 -0
- rasa/version.py +1 -1
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/METADATA +4 -2
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/RECORD +29 -26
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0a1.dist-info}/entry_points.txt +0 -0
rasa/builder/config.py
CHANGED
|
@@ -13,6 +13,10 @@ OPENAI_VECTOR_STORE_ID = os.getenv(
|
|
|
13
13
|
)
|
|
14
14
|
OPENAI_MAX_VECTOR_RESULTS = int(os.getenv("OPENAI_MAX_VECTOR_RESULTS", "10"))
|
|
15
15
|
OPENAI_TIMEOUT = int(os.getenv("OPENAI_TIMEOUT", "30"))
|
|
16
|
+
# OpenAI Token Pricing Configuration (per 1,000 tokens)
|
|
17
|
+
COPILOT_INPUT_TOKEN_PRICE = float(os.getenv("COPILOT_INPUT_TOKEN_PRICE", "0.002"))
|
|
18
|
+
COPILOT_OUTPUT_TOKEN_PRICE = float(os.getenv("COPILOT_OUTPUT_TOKEN_PRICE", "0.0005"))
|
|
19
|
+
COPILOT_CACHED_TOKEN_PRICE = float(os.getenv("COPILOT_CACHED_TOKEN_PRICE", "0.002"))
|
|
16
20
|
|
|
17
21
|
# Server Configuration
|
|
18
22
|
BUILDER_SERVER_HOST = os.getenv("SERVER_HOST", "0.0.0.0")
|
rasa/builder/copilot/copilot.py
CHANGED
|
@@ -42,6 +42,7 @@ from rasa.builder.exceptions import (
|
|
|
42
42
|
DocumentRetrievalError,
|
|
43
43
|
)
|
|
44
44
|
from rasa.builder.shared.tracker_context import TrackerContext
|
|
45
|
+
from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
|
|
45
46
|
from rasa.shared.constants import PACKAGE_NAME
|
|
46
47
|
|
|
47
48
|
structlogger = structlog.get_logger()
|
|
@@ -72,7 +73,11 @@ class Copilot:
|
|
|
72
73
|
)
|
|
73
74
|
|
|
74
75
|
# The final stream chunk includes usage statistics.
|
|
75
|
-
self.usage_statistics = UsageStatistics(
|
|
76
|
+
self.usage_statistics = UsageStatistics(
|
|
77
|
+
input_token_price=config.COPILOT_INPUT_TOKEN_PRICE,
|
|
78
|
+
output_token_price=config.COPILOT_OUTPUT_TOKEN_PRICE,
|
|
79
|
+
cached_token_price=config.COPILOT_CACHED_TOKEN_PRICE,
|
|
80
|
+
)
|
|
76
81
|
|
|
77
82
|
@asynccontextmanager
|
|
78
83
|
async def _get_client(self) -> AsyncGenerator[openai.AsyncOpenAI, None]:
|
|
@@ -94,6 +99,16 @@ class Copilot:
|
|
|
94
99
|
error=str(exc),
|
|
95
100
|
)
|
|
96
101
|
|
|
102
|
+
@property
|
|
103
|
+
def llm_config(self) -> Dict[str, Any]:
|
|
104
|
+
"""The LLM config used to generate the response."""
|
|
105
|
+
return {
|
|
106
|
+
"model": config.OPENAI_MODEL,
|
|
107
|
+
"temperature": config.OPENAI_TEMPERATURE,
|
|
108
|
+
"stream": True,
|
|
109
|
+
"stream_options": {"include_usage": True},
|
|
110
|
+
}
|
|
111
|
+
|
|
97
112
|
async def search_rasa_documentation(
|
|
98
113
|
self,
|
|
99
114
|
context: CopilotContext,
|
|
@@ -108,7 +123,9 @@ class Copilot:
|
|
|
108
123
|
"""
|
|
109
124
|
try:
|
|
110
125
|
query = self._create_documentation_search_query(context)
|
|
111
|
-
|
|
126
|
+
documents = await self._inkeep_document_retrieval.retrieve_documents(query)
|
|
127
|
+
# TODO: Log documentation retrieval to Langfuse
|
|
128
|
+
return documents
|
|
112
129
|
except DocumentRetrievalError as e:
|
|
113
130
|
structlogger.error(
|
|
114
131
|
"copilot.search_rasa_documentation.error",
|
|
@@ -145,11 +162,12 @@ class Copilot:
|
|
|
145
162
|
Exception: If an unexpected error occurs.
|
|
146
163
|
"""
|
|
147
164
|
relevant_documents = await self.search_rasa_documentation(context)
|
|
148
|
-
messages = await self._build_messages(context, relevant_documents)
|
|
149
165
|
tracker_event_attachments = self._extract_tracker_event_attachments(
|
|
150
166
|
context.copilot_chat_history[-1]
|
|
151
167
|
)
|
|
168
|
+
messages = await self._build_messages(context, relevant_documents)
|
|
152
169
|
|
|
170
|
+
# TODO: Delete this after Langfuse is implemented
|
|
153
171
|
support_evidence = CopilotGenerationContext(
|
|
154
172
|
relevant_documents=relevant_documents,
|
|
155
173
|
system_message=messages[0],
|
|
@@ -163,6 +181,7 @@ class Copilot:
|
|
|
163
181
|
support_evidence,
|
|
164
182
|
)
|
|
165
183
|
|
|
184
|
+
@CopilotLangfuseTelemetry.trace_copilot_streaming_generation
|
|
166
185
|
async def _stream_response(
|
|
167
186
|
self, messages: List[Dict[str, Any]]
|
|
168
187
|
) -> AsyncGenerator[str, None]:
|
|
@@ -172,13 +191,10 @@ class Copilot:
|
|
|
172
191
|
try:
|
|
173
192
|
async with self._get_client() as client:
|
|
174
193
|
stream = await client.chat.completions.create(
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
temperature=config.OPENAI_TEMPERATURE,
|
|
178
|
-
stream=True,
|
|
179
|
-
stream_options={"include_usage": True},
|
|
194
|
+
messages=messages,
|
|
195
|
+
**self.llm_config,
|
|
180
196
|
)
|
|
181
|
-
async for chunk in stream:
|
|
197
|
+
async for chunk in stream: # type: ignore[attr-defined]
|
|
182
198
|
# The final chunk, which contains the usage statistics,
|
|
183
199
|
# arrives with an empty `choices` list.
|
|
184
200
|
if not chunk.choices:
|
|
@@ -189,6 +205,7 @@ class Copilot:
|
|
|
189
205
|
delta = chunk.choices[0].delta
|
|
190
206
|
if delta and delta.content:
|
|
191
207
|
yield delta.content
|
|
208
|
+
|
|
192
209
|
except openai.OpenAIError as e:
|
|
193
210
|
structlogger.exception("copilot.stream_response.api_error", error=str(e))
|
|
194
211
|
raise CopilotStreamError(
|
|
@@ -559,4 +576,6 @@ class Copilot:
|
|
|
559
576
|
"""Extract the tracker event attachments from the message."""
|
|
560
577
|
if not isinstance(message, UserChatMessage):
|
|
561
578
|
return []
|
|
579
|
+
# TODO: Log tracker event attachments to Langfuse only in the case of the
|
|
580
|
+
# User chat message.
|
|
562
581
|
return message.get_content_blocks_by_type(EventContent)
|
rasa/builder/copilot/models.py
CHANGED
|
@@ -3,6 +3,7 @@ from enum import Enum
|
|
|
3
3
|
from typing import Any, Dict, List, Literal, Optional, Type, TypeVar, Union
|
|
4
4
|
|
|
5
5
|
import structlog
|
|
6
|
+
from openai.types.chat import ChatCompletion
|
|
6
7
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
7
8
|
from pydantic import (
|
|
8
9
|
BaseModel,
|
|
@@ -612,16 +613,171 @@ class TrainingErrorLog(CopilotOutput):
|
|
|
612
613
|
|
|
613
614
|
|
|
614
615
|
class UsageStatistics(BaseModel):
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
616
|
+
"""Usage statistics for a copilot generation."""
|
|
617
|
+
|
|
618
|
+
# Token usage statistics
|
|
619
|
+
prompt_tokens: Optional[int] = Field(
|
|
620
|
+
default=None,
|
|
621
|
+
description=(
|
|
622
|
+
"Total number of prompt tokens used to generate completion. "
|
|
623
|
+
"Should include cached prompt tokens."
|
|
624
|
+
),
|
|
625
|
+
)
|
|
626
|
+
completion_tokens: Optional[int] = Field(
|
|
627
|
+
default=None,
|
|
628
|
+
description="Number of generated tokens.",
|
|
629
|
+
)
|
|
630
|
+
total_tokens: Optional[int] = Field(
|
|
631
|
+
default=None,
|
|
632
|
+
description="Total number of tokens used (input + output).",
|
|
633
|
+
)
|
|
634
|
+
cached_prompt_tokens: Optional[int] = Field(
|
|
635
|
+
default=None,
|
|
636
|
+
description="Number of cached prompt tokens.",
|
|
637
|
+
)
|
|
638
|
+
model: Optional[str] = Field(
|
|
639
|
+
default=None,
|
|
640
|
+
description="The model used to generate the response.",
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
# Token prices
|
|
644
|
+
input_token_price: float = Field(
|
|
645
|
+
default=0.0,
|
|
646
|
+
description="Price per 1K input tokens in dollars.",
|
|
647
|
+
)
|
|
648
|
+
output_token_price: float = Field(
|
|
649
|
+
default=0.0,
|
|
650
|
+
description="Price per 1K output tokens in dollars.",
|
|
651
|
+
)
|
|
652
|
+
cached_token_price: float = Field(
|
|
653
|
+
default=0.0,
|
|
654
|
+
description="Price per 1K cached tokens in dollars.",
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
@property
|
|
658
|
+
def non_cached_prompt_tokens(self) -> Optional[int]:
|
|
659
|
+
"""Get the non-cached prompt tokens."""
|
|
660
|
+
if self.cached_prompt_tokens is not None and self.prompt_tokens is not None:
|
|
661
|
+
return self.prompt_tokens - self.cached_prompt_tokens
|
|
662
|
+
return self.prompt_tokens
|
|
663
|
+
|
|
664
|
+
@property
|
|
665
|
+
def non_cached_cost(self) -> Optional[float]:
|
|
666
|
+
"""Calculate the non-cached token cost based on configured pricing."""
|
|
667
|
+
if self.non_cached_prompt_tokens is None:
|
|
668
|
+
return None
|
|
669
|
+
if self.non_cached_prompt_tokens == 0:
|
|
670
|
+
return 0.0
|
|
671
|
+
|
|
672
|
+
return (self.non_cached_prompt_tokens / 1000.0) * self.input_token_price
|
|
673
|
+
|
|
674
|
+
@property
|
|
675
|
+
def cached_cost(self) -> Optional[float]:
|
|
676
|
+
"""Calculate the cached token cost based on configured pricing."""
|
|
677
|
+
if self.cached_prompt_tokens is None:
|
|
678
|
+
return None
|
|
679
|
+
if self.cached_prompt_tokens == 0:
|
|
680
|
+
return 0.0
|
|
681
|
+
|
|
682
|
+
return (self.cached_prompt_tokens / 1000.0) * self.cached_token_price
|
|
683
|
+
|
|
684
|
+
@property
|
|
685
|
+
def input_cost(self) -> Optional[float]:
|
|
686
|
+
"""Calculate the input token cost based on configured pricing.
|
|
687
|
+
|
|
688
|
+
The calculation takes into account the cached prompt tokens (if available) too.
|
|
689
|
+
"""
|
|
690
|
+
# If both non-cached and cached costs are None, there's no input cost
|
|
691
|
+
if self.non_cached_cost is None and self.cached_cost is None:
|
|
692
|
+
return None
|
|
693
|
+
|
|
694
|
+
# If only non-cached cost is available, return it
|
|
695
|
+
if self.non_cached_cost is not None and self.cached_cost is None:
|
|
696
|
+
return self.non_cached_cost
|
|
697
|
+
|
|
698
|
+
# If only cached cost is available, return it
|
|
699
|
+
if self.non_cached_cost is None and self.cached_cost is not None:
|
|
700
|
+
return self.cached_cost
|
|
701
|
+
|
|
702
|
+
# If both are available, return the sum
|
|
703
|
+
return self.non_cached_cost + self.cached_cost # type: ignore[operator]
|
|
704
|
+
|
|
705
|
+
@property
|
|
706
|
+
def output_cost(self) -> Optional[float]:
|
|
707
|
+
"""Calculate the output token cost based on configured pricing."""
|
|
708
|
+
if self.completion_tokens is None:
|
|
709
|
+
return None
|
|
710
|
+
if self.completion_tokens == 0:
|
|
711
|
+
return 0.0
|
|
712
|
+
|
|
713
|
+
return (self.completion_tokens / 1000.0) * self.output_token_price
|
|
714
|
+
|
|
715
|
+
@property
|
|
716
|
+
def total_cost(self) -> Optional[float]:
|
|
717
|
+
"""Calculate the total cost based on configured pricing.
|
|
718
|
+
|
|
719
|
+
Returns:
|
|
720
|
+
Total cost in dollars, or None if insufficient data.
|
|
721
|
+
"""
|
|
722
|
+
if self.input_cost is None or self.output_cost is None:
|
|
723
|
+
return None
|
|
724
|
+
|
|
725
|
+
return self.input_cost + self.output_cost
|
|
726
|
+
|
|
727
|
+
def update_token_prices(
|
|
728
|
+
self,
|
|
729
|
+
input_token_price: float,
|
|
730
|
+
output_token_price: float,
|
|
731
|
+
cached_token_price: float,
|
|
732
|
+
) -> None:
|
|
733
|
+
"""Update token prices with provided values.
|
|
734
|
+
|
|
735
|
+
Args:
|
|
736
|
+
input_token_price: Price per 1K input tokens in dollars.
|
|
737
|
+
output_token_price: Price per 1K output tokens in dollars.
|
|
738
|
+
cached_token_price: Price per 1K cached tokens in dollars.
|
|
739
|
+
"""
|
|
740
|
+
self.input_token_price = input_token_price
|
|
741
|
+
self.output_token_price = output_token_price
|
|
742
|
+
self.cached_token_price = cached_token_price
|
|
743
|
+
|
|
744
|
+
@classmethod
|
|
745
|
+
def from_chat_completion_response(
|
|
746
|
+
cls,
|
|
747
|
+
response: ChatCompletion,
|
|
748
|
+
input_token_price: float = 0.0,
|
|
749
|
+
output_token_price: float = 0.0,
|
|
750
|
+
cached_token_price: float = 0.0,
|
|
751
|
+
) -> Optional["UsageStatistics"]:
|
|
752
|
+
"""Create a UsageStatistics object from a ChatCompletionChunk."""
|
|
753
|
+
if not (usage := getattr(response, "usage", None)):
|
|
754
|
+
return None
|
|
755
|
+
|
|
756
|
+
usage_statistics = cls(
|
|
757
|
+
input_token_price=input_token_price,
|
|
758
|
+
output_token_price=output_token_price,
|
|
759
|
+
cached_token_price=cached_token_price,
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
usage_statistics.prompt_tokens = usage.prompt_tokens
|
|
763
|
+
usage_statistics.completion_tokens = usage.completion_tokens
|
|
764
|
+
usage_statistics.total_tokens = usage.total_tokens
|
|
765
|
+
usage_statistics.model = getattr(response, "model", None)
|
|
766
|
+
|
|
767
|
+
# Extract cached tokens if available
|
|
768
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
|
769
|
+
usage_statistics.cached_prompt_tokens = getattr(
|
|
770
|
+
usage.prompt_tokens_details, "cached_tokens", None
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
return usage_statistics
|
|
619
774
|
|
|
620
775
|
def reset(self) -> None:
|
|
621
776
|
"""Reset usage statistics to their default values."""
|
|
622
777
|
self.prompt_tokens = None
|
|
623
778
|
self.completion_tokens = None
|
|
624
779
|
self.total_tokens = None
|
|
780
|
+
self.cached_prompt_tokens = None
|
|
625
781
|
self.model = None
|
|
626
782
|
|
|
627
783
|
def update_from_stream_chunk(self, chunk: ChatCompletionChunk) -> None:
|
|
@@ -630,14 +786,25 @@ class UsageStatistics(BaseModel):
|
|
|
630
786
|
Args:
|
|
631
787
|
chunk: The OpenAI stream chunk containing usage statistics.
|
|
632
788
|
"""
|
|
789
|
+
# Reset the usage statistics to their default values
|
|
790
|
+
self.reset()
|
|
791
|
+
|
|
792
|
+
# If the chunk has no usage statistics, return
|
|
633
793
|
if not (usage := getattr(chunk, "usage", None)):
|
|
634
794
|
return
|
|
635
795
|
|
|
796
|
+
# Update the usage statistics with the values from the chunk
|
|
636
797
|
self.prompt_tokens = usage.prompt_tokens
|
|
637
798
|
self.completion_tokens = usage.completion_tokens
|
|
638
799
|
self.total_tokens = usage.total_tokens
|
|
639
800
|
self.model = getattr(chunk, "model", None)
|
|
640
801
|
|
|
802
|
+
# Extract cached tokens if available
|
|
803
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
|
804
|
+
self.cached_prompt_tokens = getattr(
|
|
805
|
+
usage.prompt_tokens_details, "cached_tokens", None
|
|
806
|
+
)
|
|
807
|
+
|
|
641
808
|
|
|
642
809
|
class SigningContext(BaseModel):
|
|
643
810
|
secret: Optional[str] = Field(None)
|
|
@@ -17,6 +17,7 @@ from rasa.builder.document_retrieval.constants import (
|
|
|
17
17
|
)
|
|
18
18
|
from rasa.builder.document_retrieval.models import Document
|
|
19
19
|
from rasa.builder.exceptions import DocumentRetrievalError
|
|
20
|
+
from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
|
|
20
21
|
from rasa.shared.utils.io import read_json_file
|
|
21
22
|
|
|
22
23
|
structlogger = structlog.get_logger()
|
|
@@ -88,6 +89,7 @@ class InKeepDocumentRetrieval:
|
|
|
88
89
|
)
|
|
89
90
|
raise e
|
|
90
91
|
|
|
92
|
+
@CopilotLangfuseTelemetry.trace_document_retrieval_generation
|
|
91
93
|
async def _call_inkeep_rag_api(
|
|
92
94
|
self, query: str, temperature: float, timeout: float
|
|
93
95
|
) -> ChatCompletion:
|
rasa/builder/download.py
CHANGED
|
@@ -27,7 +27,7 @@ def _get_pyproject_toml_content(project_id: str) -> str:
|
|
|
27
27
|
version = "0.1.0"
|
|
28
28
|
description = "Add your description for your Rasa bot here"
|
|
29
29
|
readme = "README.md"
|
|
30
|
-
dependencies = ["rasa-pro>=3.
|
|
30
|
+
dependencies = ["rasa-pro>=3.14"]
|
|
31
31
|
requires-python = ">={sys.version_info.major}.{sys.version_info.minor}"
|
|
32
32
|
"""
|
|
33
33
|
)
|
rasa/builder/service.py
CHANGED
|
@@ -5,6 +5,7 @@ import time
|
|
|
5
5
|
from http import HTTPStatus
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
|
|
8
|
+
import langfuse
|
|
8
9
|
import structlog
|
|
9
10
|
from sanic import Blueprint, HTTPResponse, response
|
|
10
11
|
from sanic.request import Request
|
|
@@ -41,7 +42,6 @@ from rasa.builder.copilot.signing import (
|
|
|
41
42
|
create_signature_envelope_for_text,
|
|
42
43
|
verify_signature,
|
|
43
44
|
)
|
|
44
|
-
from rasa.builder.copilot.telemetry import CopilotTelemetry
|
|
45
45
|
from rasa.builder.download import create_bot_project_archive
|
|
46
46
|
from rasa.builder.guardrails.constants import (
|
|
47
47
|
BLOCK_SCOPE_PROJECT,
|
|
@@ -65,6 +65,7 @@ from rasa.builder.models import (
|
|
|
65
65
|
ApiErrorResponse,
|
|
66
66
|
AssistantInfo,
|
|
67
67
|
BotData,
|
|
68
|
+
BotFiles,
|
|
68
69
|
JobCreateResponse,
|
|
69
70
|
JobStatus,
|
|
70
71
|
JobStatusEvent,
|
|
@@ -74,6 +75,8 @@ from rasa.builder.models import (
|
|
|
74
75
|
)
|
|
75
76
|
from rasa.builder.project_generator import ProjectGenerator
|
|
76
77
|
from rasa.builder.shared.tracker_context import TrackerContext
|
|
78
|
+
from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
|
|
79
|
+
from rasa.builder.telemetry.copilot_segment_telemetry import CopilotSegmentTelemetry
|
|
77
80
|
from rasa.core.agent import Agent
|
|
78
81
|
from rasa.core.channels.studio_chat import StudioChatInput
|
|
79
82
|
from rasa.core.exceptions import AgentNotReady
|
|
@@ -1020,6 +1023,9 @@ async def download_bot_project(request: Request) -> HTTPResponse:
|
|
|
1020
1023
|
schema=str,
|
|
1021
1024
|
)
|
|
1022
1025
|
@protected()
|
|
1026
|
+
# Disable automatic input/output capture for langfuse tracing
|
|
1027
|
+
# This allows manual control over what data is sent to langfuse
|
|
1028
|
+
@langfuse.observe(capture_input=False, capture_output=False)
|
|
1023
1029
|
async def copilot(request: Request) -> None:
|
|
1024
1030
|
"""Handle copilot requests with streaming markdown responses."""
|
|
1025
1031
|
sse = await request.respond(content_type="text/event-stream")
|
|
@@ -1046,9 +1052,12 @@ async def copilot(request: Request) -> None:
|
|
|
1046
1052
|
)
|
|
1047
1053
|
return
|
|
1048
1054
|
|
|
1049
|
-
telemetry =
|
|
1055
|
+
telemetry = CopilotSegmentTelemetry(
|
|
1056
|
+
project_id=HELLO_RASA_PROJECT_ID, user_id=user_id
|
|
1057
|
+
)
|
|
1050
1058
|
structlogger.debug("builder.copilot.telemetry.request.init")
|
|
1051
1059
|
|
|
1060
|
+
# TODO: This can be removed once Langfuse is completed.
|
|
1052
1061
|
if req.last_message and req.last_message.role == ROLE_USER:
|
|
1053
1062
|
structlogger.debug("builder.copilot.telemetry.request.user_turn")
|
|
1054
1063
|
# Offload telemetry logging to a background task
|
|
@@ -1088,26 +1097,9 @@ async def copilot(request: Request) -> None:
|
|
|
1088
1097
|
return
|
|
1089
1098
|
|
|
1090
1099
|
# 4. Get the necessary context for the copilot
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
)
|
|
1095
|
-
if (
|
|
1096
|
-
tracker_context is not None
|
|
1097
|
-
and llm_service.guardrails_policy_checker is not None
|
|
1098
|
-
):
|
|
1099
|
-
tracker_context = await llm_service.guardrails_policy_checker.check_assistant_chat_for_policy_violations( # noqa: E501
|
|
1100
|
-
tracker_context=tracker_context,
|
|
1101
|
-
hello_rasa_user_id=user_id,
|
|
1102
|
-
hello_rasa_project_id=HELLO_RASA_PROJECT_ID,
|
|
1103
|
-
lakera_project_id=LAKERA_ASSISTANT_HISTORY_GUARDRAIL_PROJECT_ID,
|
|
1104
|
-
)
|
|
1105
|
-
|
|
1106
|
-
# Copilot doesn't need to know about the docs and any file that is not a core
|
|
1107
|
-
# assistant file
|
|
1108
|
-
relevant_assistant_files = project_generator.get_bot_files(
|
|
1109
|
-
exclude_docs_directory=True,
|
|
1110
|
-
allowed_file_extensions=["yaml", "yml", "py", "jinja", "jinja2"],
|
|
1100
|
+
tracker_context = await get_tracker_context_for_copilot(request, req, user_id)
|
|
1101
|
+
relevant_assistant_files = get_relevant_assistant_files_for_copilot(
|
|
1102
|
+
project_generator,
|
|
1111
1103
|
)
|
|
1112
1104
|
context = CopilotContext(
|
|
1113
1105
|
tracker_context=tracker_context,
|
|
@@ -1162,7 +1154,7 @@ async def copilot(request: Request) -> None:
|
|
|
1162
1154
|
async for token in intercepted_stream:
|
|
1163
1155
|
await sse.send(token.to_sse_event().format())
|
|
1164
1156
|
|
|
1165
|
-
#
|
|
1157
|
+
# 8a. Offload metabase telemetry logging to a background task
|
|
1166
1158
|
request.app.add_task(
|
|
1167
1159
|
asyncio.to_thread(
|
|
1168
1160
|
telemetry.log_copilot_from_handler,
|
|
@@ -1177,9 +1169,27 @@ async def copilot(request: Request) -> None:
|
|
|
1177
1169
|
else None
|
|
1178
1170
|
),
|
|
1179
1171
|
tracker_event_attachments=generation_context.tracker_event_attachments,
|
|
1180
|
-
|
|
1172
|
+
model=copilot_client.usage_statistics.model or "N/A",
|
|
1173
|
+
prompt_tokens=copilot_client.usage_statistics.prompt_tokens or 0,
|
|
1174
|
+
cached_prompt_tokens=(
|
|
1175
|
+
copilot_client.usage_statistics.cached_prompt_tokens or 0
|
|
1176
|
+
),
|
|
1177
|
+
completion_tokens=(
|
|
1178
|
+
copilot_client.usage_statistics.completion_tokens or 0
|
|
1179
|
+
),
|
|
1180
|
+
total_tokens=copilot_client.usage_statistics.total_tokens or 0,
|
|
1181
1181
|
)
|
|
1182
1182
|
)
|
|
1183
|
+
# 8b. Setup output trace attributes for Langfuse
|
|
1184
|
+
CopilotLangfuseTelemetry.setup_copilot_endpoint_call_trace_attributes(
|
|
1185
|
+
hello_rasa_project_id=HELLO_RASA_PROJECT_ID or "N/A",
|
|
1186
|
+
chat_id=req.session_id or "N/A",
|
|
1187
|
+
user_id=user_id,
|
|
1188
|
+
request=req,
|
|
1189
|
+
handler=copilot_response_handler,
|
|
1190
|
+
relevant_documents=generation_context.relevant_documents,
|
|
1191
|
+
copilot_context=context,
|
|
1192
|
+
)
|
|
1183
1193
|
|
|
1184
1194
|
# 9. Once the stream is over, extract and send references
|
|
1185
1195
|
# if any documents were used
|
|
@@ -1365,3 +1375,70 @@ async def _handle_guardrail_violation_and_maybe_block(
|
|
|
1365
1375
|
|
|
1366
1376
|
await sse.send(message.to_sse_event().format())
|
|
1367
1377
|
return message
|
|
1378
|
+
|
|
1379
|
+
|
|
1380
|
+
@langfuse.observe(capture_input=False, capture_output=False)
|
|
1381
|
+
async def get_tracker_context_for_copilot(
|
|
1382
|
+
request: Request,
|
|
1383
|
+
req: CopilotRequest,
|
|
1384
|
+
user_id: str,
|
|
1385
|
+
) -> Optional[TrackerContext]:
|
|
1386
|
+
"""Check the assistant chat for guardrail policy violations.
|
|
1387
|
+
|
|
1388
|
+
Args:
|
|
1389
|
+
request: The request object.
|
|
1390
|
+
req: The CopilotRequest object.
|
|
1391
|
+
user_id: The user ID.
|
|
1392
|
+
|
|
1393
|
+
Returns:
|
|
1394
|
+
The tracker context if the tracker is available.
|
|
1395
|
+
"""
|
|
1396
|
+
tracker = await current_tracker_from_input_channel(request.app, req.session_id)
|
|
1397
|
+
tracker_context = TrackerContext.from_tracker(
|
|
1398
|
+
tracker, max_turns=COPILOT_ASSISTANT_TRACKER_MAX_TURNS
|
|
1399
|
+
)
|
|
1400
|
+
if (
|
|
1401
|
+
tracker_context is not None
|
|
1402
|
+
and llm_service.guardrails_policy_checker is not None
|
|
1403
|
+
):
|
|
1404
|
+
tracker_context = await llm_service.guardrails_policy_checker.check_assistant_chat_for_policy_violations( # noqa: E501
|
|
1405
|
+
tracker_context=tracker_context,
|
|
1406
|
+
hello_rasa_user_id=user_id,
|
|
1407
|
+
hello_rasa_project_id=HELLO_RASA_PROJECT_ID,
|
|
1408
|
+
lakera_project_id=LAKERA_ASSISTANT_HISTORY_GUARDRAIL_PROJECT_ID,
|
|
1409
|
+
)
|
|
1410
|
+
|
|
1411
|
+
# Track the retrieved tracker context
|
|
1412
|
+
CopilotLangfuseTelemetry.trace_copilot_tracker_context(
|
|
1413
|
+
tracker_context=tracker_context,
|
|
1414
|
+
max_conversation_turns=COPILOT_ASSISTANT_TRACKER_MAX_TURNS,
|
|
1415
|
+
session_id=req.session_id,
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
return tracker_context
|
|
1419
|
+
|
|
1420
|
+
|
|
1421
|
+
@langfuse.observe(capture_input=False, capture_output=False)
|
|
1422
|
+
def get_relevant_assistant_files_for_copilot(
|
|
1423
|
+
project_generator: ProjectGenerator,
|
|
1424
|
+
) -> BotFiles:
|
|
1425
|
+
"""Get the relevant assistant files for the copilot.
|
|
1426
|
+
|
|
1427
|
+
Args:
|
|
1428
|
+
project_generator: The project generator.
|
|
1429
|
+
|
|
1430
|
+
Returns:
|
|
1431
|
+
The relevant assistant files.
|
|
1432
|
+
"""
|
|
1433
|
+
# Copilot doesn't need to know about the docs and any file that is not a core
|
|
1434
|
+
# assistant file
|
|
1435
|
+
files = project_generator.get_bot_files(
|
|
1436
|
+
exclude_docs_directory=True,
|
|
1437
|
+
allowed_file_extensions=["yaml", "yml", "py", "jinja", "jinja2"],
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
# Track the retrieved assistant files
|
|
1441
|
+
CopilotLangfuseTelemetry.trace_copilot_relevant_assistant_files(
|
|
1442
|
+
relevant_assistant_files=files,
|
|
1443
|
+
)
|
|
1444
|
+
return files
|
|
File without changes
|