rasa-pro 3.12.5__py3-none-any.whl → 3.12.6.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__init__.py +6 -0
- rasa/core/channels/voice_ready/audiocodes.py +6 -0
- rasa/core/channels/voice_stream/audiocodes.py +53 -9
- rasa/core/channels/voice_stream/genesys.py +146 -16
- rasa/core/nlg/contextual_response_rephraser.py +21 -4
- rasa/core/nlg/summarize.py +15 -1
- rasa/core/policies/enterprise_search_policy.py +16 -3
- rasa/core/policies/intentless_policy.py +17 -4
- rasa/core/policies/policy.py +2 -0
- rasa/dialogue_understanding/coexistence/llm_based_router.py +18 -4
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +8 -2
- rasa/dialogue_understanding/generator/llm_command_generator.py +3 -1
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +12 -1
- rasa/hooks.py +55 -0
- rasa/monkey_patches.py +91 -0
- rasa/shared/constants.py +5 -0
- rasa/shared/core/slot_mappings.py +12 -0
- rasa/shared/providers/constants.py +9 -0
- rasa/shared/providers/llm/_base_litellm_client.py +14 -4
- rasa/shared/providers/llm/litellm_router_llm_client.py +17 -7
- rasa/shared/providers/llm/llm_client.py +24 -15
- rasa/shared/providers/llm/self_hosted_llm_client.py +10 -2
- rasa/shared/utils/health_check/health_check.py +7 -1
- rasa/tracing/instrumentation/attribute_extractors.py +4 -4
- rasa/tracing/instrumentation/intentless_policy_instrumentation.py +2 -1
- rasa/utils/licensing.py +15 -0
- rasa/version.py +1 -1
- {rasa_pro-3.12.5.dist-info → rasa_pro-3.12.6.dev2.dist-info}/METADATA +5 -5
- {rasa_pro-3.12.5.dist-info → rasa_pro-3.12.6.dev2.dist-info}/RECORD +32 -33
- {rasa_pro-3.12.5.dist-info → rasa_pro-3.12.6.dev2.dist-info}/WHEEL +1 -1
- README.md +0 -38
- rasa/keys +0 -1
- {rasa_pro-3.12.5.dist-info → rasa_pro-3.12.6.dev2.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.5.dist-info → rasa_pro-3.12.6.dev2.dist-info}/entry_points.txt +0 -0
|
@@ -49,6 +49,7 @@ from rasa.shared.utils.llm import (
|
|
|
49
49
|
llm_factory,
|
|
50
50
|
resolve_model_client_config,
|
|
51
51
|
)
|
|
52
|
+
from rasa.utils.licensing import get_human_readable_licence_owner
|
|
52
53
|
from rasa.utils.log_utils import log_llm
|
|
53
54
|
|
|
54
55
|
structlogger = structlog.get_logger()
|
|
@@ -92,6 +93,8 @@ class LLMBasedCommandGenerator(
|
|
|
92
93
|
else:
|
|
93
94
|
self.flow_retrieval = None
|
|
94
95
|
|
|
96
|
+
self.user_id = get_human_readable_licence_owner()
|
|
97
|
+
|
|
95
98
|
### Abstract methods
|
|
96
99
|
@staticmethod
|
|
97
100
|
@abstractmethod
|
|
@@ -331,7 +334,9 @@ class LLMBasedCommandGenerator(
|
|
|
331
334
|
|
|
332
335
|
@measure_llm_latency
|
|
333
336
|
async def invoke_llm(
|
|
334
|
-
self,
|
|
337
|
+
self,
|
|
338
|
+
prompt: Union[List[dict], List[str], str],
|
|
339
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
335
340
|
) -> Optional[LLMResponse]:
|
|
336
341
|
"""Use LLM to generate a response.
|
|
337
342
|
|
|
@@ -344,6 +349,7 @@ class LLMBasedCommandGenerator(
|
|
|
344
349
|
- a list of messages. Each message is a string and will be formatted
|
|
345
350
|
as a user message.
|
|
346
351
|
- a single message as a string which will be formatted as user message.
|
|
352
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
347
353
|
|
|
348
354
|
Returns:
|
|
349
355
|
An LLMResponse object.
|
|
@@ -355,7 +361,7 @@ class LLMBasedCommandGenerator(
|
|
|
355
361
|
self.config.get(LLM_CONFIG_KEY), self.get_default_llm_config()
|
|
356
362
|
)
|
|
357
363
|
try:
|
|
358
|
-
return await llm.acompletion(prompt)
|
|
364
|
+
return await llm.acompletion(prompt, metadata)
|
|
359
365
|
except Exception as e:
|
|
360
366
|
# unfortunately, langchain does not wrap LLM exceptions which means
|
|
361
367
|
# we have to catch all exceptions here
|
|
@@ -55,7 +55,9 @@ class LLMCommandGenerator(SingleStepLLMCommandGenerator):
|
|
|
55
55
|
)
|
|
56
56
|
|
|
57
57
|
async def invoke_llm(
|
|
58
|
-
self,
|
|
58
|
+
self,
|
|
59
|
+
prompt: Union[List[dict], List[str], str],
|
|
60
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
59
61
|
) -> Optional[LLMResponse]:
|
|
60
62
|
try:
|
|
61
63
|
return await super().invoke_llm(prompt)
|
|
@@ -47,6 +47,10 @@ from rasa.shared.constants import (
|
|
|
47
47
|
AWS_BEDROCK_PROVIDER,
|
|
48
48
|
AZURE_OPENAI_PROVIDER,
|
|
49
49
|
EMBEDDINGS_CONFIG_KEY,
|
|
50
|
+
LANGFUSE_CUSTOM_METADATA_DICT,
|
|
51
|
+
LANGFUSE_METADATA_SESSION_ID,
|
|
52
|
+
LANGFUSE_METADATA_USER_ID,
|
|
53
|
+
LANGFUSE_TAGS,
|
|
50
54
|
MAX_TOKENS_CONFIG_KEY,
|
|
51
55
|
PROMPT_TEMPLATE_CONFIG_KEY,
|
|
52
56
|
ROUTE_TO_CALM_SLOT,
|
|
@@ -362,7 +366,14 @@ class CompactLLMCommandGenerator(LLMBasedCommandGenerator):
|
|
|
362
366
|
prompt=flow_prompt,
|
|
363
367
|
)
|
|
364
368
|
|
|
365
|
-
|
|
369
|
+
metadata = {
|
|
370
|
+
LANGFUSE_METADATA_USER_ID: self.user_id,
|
|
371
|
+
LANGFUSE_METADATA_SESSION_ID: tracker.sender_id if tracker else "",
|
|
372
|
+
LANGFUSE_CUSTOM_METADATA_DICT: {"component": self.__class__.__name__},
|
|
373
|
+
LANGFUSE_TAGS: [self.__class__.__name__],
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
response = await self.invoke_llm(flow_prompt, metadata)
|
|
366
377
|
llm_response = LLMResponse.ensure_llm_response(response)
|
|
367
378
|
# The check for 'None' maintains compatibility with older versions
|
|
368
379
|
# of LLMCommandGenerator. In previous implementations, 'invoke_llm'
|
rasa/hooks.py
CHANGED
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import logging
|
|
3
|
+
import os
|
|
3
4
|
from typing import TYPE_CHECKING, List, Optional, Text, Union
|
|
4
5
|
|
|
6
|
+
import litellm
|
|
5
7
|
import pluggy
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
from rasa.shared.providers.constants import (
|
|
11
|
+
LANGFUSE_CALLBACK_NAME,
|
|
12
|
+
LANGFUSE_HOST_ENV_VAR,
|
|
13
|
+
LANGFUSE_PROJECT_ID_ENV_VAR,
|
|
14
|
+
LANGFUSE_PUBLIC_KEY_ENV_VAR,
|
|
15
|
+
LANGFUSE_SECRET_KEY_ENV_VAR,
|
|
16
|
+
RASA_LANGFUSE_INTEGRATION_ENABLED_ENV_VAR,
|
|
17
|
+
)
|
|
6
18
|
|
|
7
19
|
# IMPORTANT: do not import anything from rasa here - use scoped imports
|
|
8
20
|
# this avoids circular imports, as the hooks are used in different places
|
|
@@ -18,6 +30,7 @@ if TYPE_CHECKING:
|
|
|
18
30
|
|
|
19
31
|
hookimpl = pluggy.HookimplMarker("rasa")
|
|
20
32
|
logger = logging.getLogger(__name__)
|
|
33
|
+
structlogger = structlog.get_logger()
|
|
21
34
|
|
|
22
35
|
|
|
23
36
|
@hookimpl # type: ignore[misc]
|
|
@@ -57,6 +70,8 @@ def configure_commandline(cmdline_arguments: argparse.Namespace) -> Optional[Tex
|
|
|
57
70
|
config.configure_tracing(tracer_provider)
|
|
58
71
|
config.configure_metrics(endpoints_file)
|
|
59
72
|
|
|
73
|
+
_init_langfuse_integration()
|
|
74
|
+
|
|
60
75
|
return endpoints_file
|
|
61
76
|
|
|
62
77
|
|
|
@@ -115,3 +130,43 @@ def after_server_stop() -> None:
|
|
|
115
130
|
|
|
116
131
|
if anon_pipeline is not None:
|
|
117
132
|
anon_pipeline.stop()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _is_langfuse_integration_enabled() -> bool:
|
|
136
|
+
return (
|
|
137
|
+
os.environ.get(RASA_LANGFUSE_INTEGRATION_ENABLED_ENV_VAR, "false").lower()
|
|
138
|
+
== "true"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _init_langfuse_integration() -> None:
|
|
143
|
+
if not _is_langfuse_integration_enabled():
|
|
144
|
+
structlogger.info(
|
|
145
|
+
"hooks._init_langfuse_integration.disabled",
|
|
146
|
+
event_info="Langfuse integration is disabled.",
|
|
147
|
+
)
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
if (
|
|
151
|
+
not os.environ.get(LANGFUSE_HOST_ENV_VAR)
|
|
152
|
+
or not os.environ.get(LANGFUSE_PROJECT_ID_ENV_VAR)
|
|
153
|
+
or not os.environ.get(LANGFUSE_PUBLIC_KEY_ENV_VAR)
|
|
154
|
+
or not os.environ.get(LANGFUSE_SECRET_KEY_ENV_VAR)
|
|
155
|
+
):
|
|
156
|
+
structlogger.warning(
|
|
157
|
+
"hooks._init_langfuse_integration.missing_langfuse_keys",
|
|
158
|
+
event_info=(
|
|
159
|
+
"Langfuse integration is enabled, but some environment variables"
|
|
160
|
+
"are missing. Please set LANGFUSE_HOST, LANGFUSE_PROJECT_ID, "
|
|
161
|
+
"LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY environment "
|
|
162
|
+
"variables to use Langfuse integration."
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
litellm.success_callback = [LANGFUSE_CALLBACK_NAME]
|
|
168
|
+
litellm.failure_callback = [LANGFUSE_CALLBACK_NAME]
|
|
169
|
+
structlogger.info(
|
|
170
|
+
"hooks.langfuse_callbacks_initialized",
|
|
171
|
+
event_info="Langfuse integration initialized.",
|
|
172
|
+
)
|
rasa/monkey_patches.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import traceback
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from litellm.secret_managers.main import str_to_bool
|
|
6
|
+
from packaging.version import Version
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def litellm_langfuse_logger_init_fixed(
|
|
10
|
+
self: Any, # we should not import LangfuseLogger class before we patch it
|
|
11
|
+
langfuse_public_key: Optional[str] = None,
|
|
12
|
+
langfuse_secret: Optional[str] = None,
|
|
13
|
+
langfuse_host: str = "https://cloud.langfuse.com",
|
|
14
|
+
flush_interval: int = 1,
|
|
15
|
+
) -> None:
|
|
16
|
+
"""Monkeypatched version of LangfuseLogger.__init__ from the LiteLLM library.
|
|
17
|
+
|
|
18
|
+
This patched version removes a call that fetched the `project_id` from
|
|
19
|
+
Langfuse Cloud even when it was already set via environment variables.
|
|
20
|
+
In the original implementation, this call was made *before* initializing
|
|
21
|
+
the LangfuseClient, which caused the application to freeze for up to 60 seconds.
|
|
22
|
+
|
|
23
|
+
By removing this premature call, the monkeypatch avoids the unnecessary network
|
|
24
|
+
request and prevents the timeout/freeze issue.
|
|
25
|
+
|
|
26
|
+
This workaround can be removed once the underlying bug is resolved in LiteLLM:
|
|
27
|
+
https://github.com/BerriAI/litellm/issues/7732
|
|
28
|
+
"""
|
|
29
|
+
try:
|
|
30
|
+
import langfuse
|
|
31
|
+
from langfuse import Langfuse
|
|
32
|
+
except Exception as e:
|
|
33
|
+
raise Exception(
|
|
34
|
+
f"\033[91mLangfuse not installed, try running 'pip install langfuse' "
|
|
35
|
+
f"to fix this error: {e}\n{traceback.format_exc()}\033[0m"
|
|
36
|
+
)
|
|
37
|
+
# Instance variables
|
|
38
|
+
self.secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY", "")
|
|
39
|
+
self.public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY", "")
|
|
40
|
+
|
|
41
|
+
self.langfuse_host = langfuse_host or os.getenv(
|
|
42
|
+
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
|
43
|
+
)
|
|
44
|
+
self.langfuse_host.replace("http://", "https://")
|
|
45
|
+
if not self.langfuse_host.startswith("https://"):
|
|
46
|
+
self.langfuse_host = "https://" + self.langfuse_host
|
|
47
|
+
|
|
48
|
+
self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
|
|
49
|
+
self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
|
|
50
|
+
self.langfuse_flush_interval = (
|
|
51
|
+
os.getenv("LANGFUSE_FLUSH_INTERVAL") or flush_interval
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
parameters = {
|
|
55
|
+
"public_key": self.public_key,
|
|
56
|
+
"secret_key": self.secret_key,
|
|
57
|
+
"host": self.langfuse_host,
|
|
58
|
+
"release": self.langfuse_release,
|
|
59
|
+
"debug": self.langfuse_debug,
|
|
60
|
+
"flush_interval": self.langfuse_flush_interval, # flush interval in seconds
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if Version(langfuse.version.__version__) >= Version("2.6.0"):
|
|
64
|
+
parameters["sdk_integration"] = "litellm"
|
|
65
|
+
|
|
66
|
+
self.Langfuse = Langfuse(**parameters)
|
|
67
|
+
|
|
68
|
+
if os.getenv("UPSTREAM_LANGFUSE_SECRET_KEY") is not None:
|
|
69
|
+
upstream_langfuse_debug = (
|
|
70
|
+
str_to_bool(self.upstream_langfuse_debug)
|
|
71
|
+
if self.upstream_langfuse_debug is not None
|
|
72
|
+
else None
|
|
73
|
+
)
|
|
74
|
+
self.upstream_langfuse_secret_key = os.getenv("UPSTREAM_LANGFUSE_SECRET_KEY")
|
|
75
|
+
self.upstream_langfuse_public_key = os.getenv("UPSTREAM_LANGFUSE_PUBLIC_KEY")
|
|
76
|
+
self.upstream_langfuse_host = os.getenv("UPSTREAM_LANGFUSE_HOST")
|
|
77
|
+
self.upstream_langfuse_release = os.getenv("UPSTREAM_LANGFUSE_RELEASE")
|
|
78
|
+
self.upstream_langfuse_debug = os.getenv("UPSTREAM_LANGFUSE_DEBUG")
|
|
79
|
+
self.upstream_langfuse = Langfuse(
|
|
80
|
+
public_key=self.upstream_langfuse_public_key,
|
|
81
|
+
secret_key=self.upstream_langfuse_secret_key,
|
|
82
|
+
host=self.upstream_langfuse_host,
|
|
83
|
+
release=self.upstream_langfuse_release,
|
|
84
|
+
debug=(
|
|
85
|
+
upstream_langfuse_debug
|
|
86
|
+
if upstream_langfuse_debug is not None
|
|
87
|
+
else False
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
else:
|
|
91
|
+
self.upstream_langfuse = None
|
rasa/shared/constants.py
CHANGED
|
@@ -338,3 +338,8 @@ ROLE_SYSTEM = "system"
|
|
|
338
338
|
# Used for key values in ValidateSlotPatternFlowStackFrame
|
|
339
339
|
REFILL_UTTER = "refill_utter"
|
|
340
340
|
REJECTIONS = "rejections"
|
|
341
|
+
|
|
342
|
+
LANGFUSE_METADATA_USER_ID = "trace_user_id"
|
|
343
|
+
LANGFUSE_METADATA_SESSION_ID = "session_id"
|
|
344
|
+
LANGFUSE_CUSTOM_METADATA_DICT = "trace_metadata"
|
|
345
|
+
LANGFUSE_TAGS = "tags"
|
|
@@ -115,6 +115,18 @@ class SlotMapping(BaseModel):
|
|
|
115
115
|
)
|
|
116
116
|
data_copy[KEY_RUN_ACTION_EVERY_TURN] = deprecated_action
|
|
117
117
|
|
|
118
|
+
structlogger.warning(
|
|
119
|
+
"slot_mapping.deprecated_action_key_replaced_with_run_action_every_turn",
|
|
120
|
+
slot_name=slot_name,
|
|
121
|
+
event_info=f"The `{KEY_ACTION}` key in slot mappings "
|
|
122
|
+
f"has been replaced with "
|
|
123
|
+
f"the `{KEY_RUN_ACTION_EVERY_TURN}` key. "
|
|
124
|
+
f"This will result in the custom action "
|
|
125
|
+
f"being executed at every conversation turn "
|
|
126
|
+
f"automatically. Remove the key "
|
|
127
|
+
f"to avoid this behavior.",
|
|
128
|
+
)
|
|
129
|
+
|
|
118
130
|
run_action_every_turn = data_copy.pop(KEY_RUN_ACTION_EVERY_TURN, None)
|
|
119
131
|
|
|
120
132
|
coexistence_system = data_copy.pop(KEY_COEXISTENCE_SYSTEM, None)
|
|
@@ -4,3 +4,12 @@ LITE_LLM_API_KEY_FIELD = "api_key"
|
|
|
4
4
|
LITE_LLM_API_VERSION_FIELD = "api_version"
|
|
5
5
|
LITE_LLM_MODEL_FIELD = "model"
|
|
6
6
|
LITE_LLM_AZURE_AD_TOKEN = "azure_ad_token"
|
|
7
|
+
|
|
8
|
+
# Enable or disable Langfuse integration
|
|
9
|
+
RASA_LANGFUSE_INTEGRATION_ENABLED_ENV_VAR = "RASA_LANGFUSE_INTEGRATION_ENABLED"
|
|
10
|
+
# Langfuse configuration
|
|
11
|
+
LANGFUSE_CALLBACK_NAME = "langfuse"
|
|
12
|
+
LANGFUSE_HOST_ENV_VAR = "LANGFUSE_HOST"
|
|
13
|
+
LANGFUSE_PROJECT_ID_ENV_VAR = "LANGFUSE_PROJECT_ID"
|
|
14
|
+
LANGFUSE_PUBLIC_KEY_ENV_VAR = "LANGFUSE_PUBLIC_KEY"
|
|
15
|
+
LANGFUSE_SECRET_KEY_ENV_VAR = "LANGFUSE_SECRET_KEY"
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from abc import abstractmethod
|
|
5
|
-
from typing import Any, Dict, List, Union, cast
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union, cast
|
|
6
6
|
|
|
7
7
|
import structlog
|
|
8
8
|
from litellm import acompletion, completion, validate_environment
|
|
@@ -120,7 +120,11 @@ class _BaseLiteLLMClient:
|
|
|
120
120
|
raise ProviderClientValidationError(event_info)
|
|
121
121
|
|
|
122
122
|
@suppress_logs(log_level=logging.WARNING)
|
|
123
|
-
def completion(
|
|
123
|
+
def completion(
|
|
124
|
+
self,
|
|
125
|
+
messages: Union[List[dict], List[str], str],
|
|
126
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
127
|
+
) -> LLMResponse:
|
|
124
128
|
"""Synchronously generate completions for given list of messages.
|
|
125
129
|
|
|
126
130
|
Args:
|
|
@@ -132,6 +136,7 @@ class _BaseLiteLLMClient:
|
|
|
132
136
|
- a list of messages. Each message is a string and will be formatted
|
|
133
137
|
as a user message.
|
|
134
138
|
- a single message as a string which will be formatted as user message.
|
|
139
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
135
140
|
|
|
136
141
|
Returns:
|
|
137
142
|
List of message completions.
|
|
@@ -149,7 +154,9 @@ class _BaseLiteLLMClient:
|
|
|
149
154
|
|
|
150
155
|
@suppress_logs(log_level=logging.WARNING)
|
|
151
156
|
async def acompletion(
|
|
152
|
-
self,
|
|
157
|
+
self,
|
|
158
|
+
messages: Union[List[dict], List[str], str],
|
|
159
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
153
160
|
) -> LLMResponse:
|
|
154
161
|
"""Asynchronously generate completions for given list of messages.
|
|
155
162
|
|
|
@@ -162,6 +169,7 @@ class _BaseLiteLLMClient:
|
|
|
162
169
|
- a list of messages. Each message is a string and will be formatted
|
|
163
170
|
as a user message.
|
|
164
171
|
- a single message as a string which will be formatted as user message.
|
|
172
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
165
173
|
|
|
166
174
|
Returns:
|
|
167
175
|
List of message completions.
|
|
@@ -172,7 +180,9 @@ class _BaseLiteLLMClient:
|
|
|
172
180
|
try:
|
|
173
181
|
formatted_messages = self._get_formatted_messages(messages)
|
|
174
182
|
arguments = resolve_environment_variables(self._completion_fn_args)
|
|
175
|
-
response = await acompletion(
|
|
183
|
+
response = await acompletion(
|
|
184
|
+
messages=formatted_messages, metadata=metadata, **arguments
|
|
185
|
+
)
|
|
176
186
|
return self._format_response(response)
|
|
177
187
|
except Exception as e:
|
|
178
188
|
message = ""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Dict, List, Union
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import structlog
|
|
7
7
|
|
|
@@ -122,9 +122,12 @@ class LiteLLMRouterLLMClient(_BaseLiteLLMRouterClient, _BaseLiteLLMClient):
|
|
|
122
122
|
raise ProviderClientAPIException(e)
|
|
123
123
|
|
|
124
124
|
@suppress_logs(log_level=logging.WARNING)
|
|
125
|
-
def completion(
|
|
126
|
-
|
|
127
|
-
|
|
125
|
+
def completion(
|
|
126
|
+
self,
|
|
127
|
+
messages: Union[List[dict], List[str], str],
|
|
128
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
129
|
+
) -> LLMResponse:
|
|
130
|
+
"""Synchronously generate completions for given list of messages.
|
|
128
131
|
|
|
129
132
|
Method overrides the base class method to call the appropriate
|
|
130
133
|
completion method based on the configuration. If the chat completions
|
|
@@ -140,8 +143,11 @@ class LiteLLMRouterLLMClient(_BaseLiteLLMRouterClient, _BaseLiteLLMClient):
|
|
|
140
143
|
- a list of messages. Each message is a string and will be formatted
|
|
141
144
|
as a user message.
|
|
142
145
|
- a single message as a string which will be formatted as user message.
|
|
146
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
147
|
+
|
|
143
148
|
Returns:
|
|
144
149
|
List of message completions.
|
|
150
|
+
|
|
145
151
|
Raises:
|
|
146
152
|
ProviderClientAPIException: If the API request fails.
|
|
147
153
|
"""
|
|
@@ -158,10 +164,11 @@ class LiteLLMRouterLLMClient(_BaseLiteLLMRouterClient, _BaseLiteLLMClient):
|
|
|
158
164
|
|
|
159
165
|
@suppress_logs(log_level=logging.WARNING)
|
|
160
166
|
async def acompletion(
|
|
161
|
-
self,
|
|
167
|
+
self,
|
|
168
|
+
messages: Union[List[dict], List[str], str],
|
|
169
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
162
170
|
) -> LLMResponse:
|
|
163
|
-
"""
|
|
164
|
-
Asynchronously generate completions for given list of messages.
|
|
171
|
+
"""Asynchronously generate completions for given list of messages.
|
|
165
172
|
|
|
166
173
|
Method overrides the base class method to call the appropriate
|
|
167
174
|
completion method based on the configuration. If the chat completions
|
|
@@ -177,8 +184,11 @@ class LiteLLMRouterLLMClient(_BaseLiteLLMRouterClient, _BaseLiteLLMClient):
|
|
|
177
184
|
- a list of messages. Each message is a string and will be formatted
|
|
178
185
|
as a user message.
|
|
179
186
|
- a single message as a string which will be formatted as user message.
|
|
187
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
188
|
+
|
|
180
189
|
Returns:
|
|
181
190
|
List of message completions.
|
|
191
|
+
|
|
182
192
|
Raises:
|
|
183
193
|
ProviderClientAPIException: If the API request fails.
|
|
184
194
|
"""
|
|
@@ -1,21 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Dict, List, Protocol, Union, runtime_checkable
|
|
3
|
+
from typing import Any, Dict, List, Optional, Protocol, Union, runtime_checkable
|
|
4
4
|
|
|
5
5
|
from rasa.shared.providers.llm.llm_response import LLMResponse
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
@runtime_checkable
|
|
9
9
|
class LLMClient(Protocol):
|
|
10
|
-
"""
|
|
11
|
-
Protocol for an LLM client that specifies the interface for interacting
|
|
10
|
+
"""Protocol for an LLM client that specifies the interface for interacting
|
|
12
11
|
with the API.
|
|
13
12
|
"""
|
|
14
13
|
|
|
15
14
|
@classmethod
|
|
16
15
|
def from_config(cls, config: dict) -> LLMClient:
|
|
17
|
-
"""
|
|
18
|
-
Initializes the llm client with the given configuration.
|
|
16
|
+
"""Initializes the llm client with the given configuration.
|
|
19
17
|
|
|
20
18
|
This class method should be implemented to parse the given
|
|
21
19
|
configuration and create an instance of an llm client.
|
|
@@ -24,17 +22,24 @@ class LLMClient(Protocol):
|
|
|
24
22
|
|
|
25
23
|
@property
|
|
26
24
|
def config(self) -> Dict:
|
|
27
|
-
"""
|
|
28
|
-
Returns the configuration for that the llm client is initialized with.
|
|
25
|
+
"""Returns the configuration for that the llm client is initialized with.
|
|
29
26
|
|
|
30
27
|
This property should be implemented to return a dictionary containing
|
|
31
28
|
the configuration settings for the llm client.
|
|
32
29
|
"""
|
|
33
30
|
...
|
|
34
31
|
|
|
35
|
-
def completion(
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
def completion(
|
|
33
|
+
self,
|
|
34
|
+
messages: Union[List[dict], List[str], str],
|
|
35
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
36
|
+
) -> LLMResponse:
|
|
37
|
+
"""Synchronously generate completions for given list of messages.
|
|
38
|
+
def completion(
|
|
39
|
+
self,
|
|
40
|
+
messages: Union[List[dict], List[str], str],
|
|
41
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
42
|
+
) -> LLMResponse:
|
|
38
43
|
|
|
39
44
|
This method should be implemented to take a list of messages (as
|
|
40
45
|
strings) and return a list of completions (as strings).
|
|
@@ -48,16 +53,19 @@ class LLMClient(Protocol):
|
|
|
48
53
|
- a list of messages. Each message is a string and will be formatted
|
|
49
54
|
as a user message.
|
|
50
55
|
- a single message as a string which will be formatted as user message.
|
|
56
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
57
|
+
|
|
51
58
|
Returns:
|
|
52
59
|
LLMResponse
|
|
53
60
|
"""
|
|
54
61
|
...
|
|
55
62
|
|
|
56
63
|
async def acompletion(
|
|
57
|
-
self,
|
|
64
|
+
self,
|
|
65
|
+
messages: Union[List[dict], List[str], str],
|
|
66
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
58
67
|
) -> LLMResponse:
|
|
59
|
-
"""
|
|
60
|
-
Asynchronously generate completions for given list of messages.
|
|
68
|
+
"""Asynchronously generate completions for given list of messages.
|
|
61
69
|
|
|
62
70
|
This method should be implemented to take a list of messages (as
|
|
63
71
|
strings) and return a list of completions (as strings).
|
|
@@ -71,14 +79,15 @@ class LLMClient(Protocol):
|
|
|
71
79
|
- a list of messages. Each message is a string and will be formatted
|
|
72
80
|
as a user message.
|
|
73
81
|
- a single message as a string which will be formatted as user message.
|
|
82
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
83
|
+
|
|
74
84
|
Returns:
|
|
75
85
|
LLMResponse
|
|
76
86
|
"""
|
|
77
87
|
...
|
|
78
88
|
|
|
79
89
|
def validate_client_setup(self, *args, **kwargs) -> None: # type: ignore
|
|
80
|
-
"""
|
|
81
|
-
Perform client setup validation.
|
|
90
|
+
"""Perform client setup validation.
|
|
82
91
|
|
|
83
92
|
This method should be implemented to validate whether the client can be
|
|
84
93
|
used with the parameters provided through configuration or environment
|
|
@@ -237,7 +237,9 @@ class SelfHostedLLMClient(_BaseLiteLLMClient):
|
|
|
237
237
|
raise ProviderClientAPIException(e)
|
|
238
238
|
|
|
239
239
|
async def acompletion(
|
|
240
|
-
self,
|
|
240
|
+
self,
|
|
241
|
+
messages: Union[List[dict], List[str], str],
|
|
242
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
241
243
|
) -> LLMResponse:
|
|
242
244
|
"""Asynchronous completion of the model with the given messages.
|
|
243
245
|
|
|
@@ -255,6 +257,7 @@ class SelfHostedLLMClient(_BaseLiteLLMClient):
|
|
|
255
257
|
- a list of messages. Each message is a string and will be formatted
|
|
256
258
|
as a user message.
|
|
257
259
|
- a single message as a string which will be formatted as user message.
|
|
260
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
258
261
|
|
|
259
262
|
Returns:
|
|
260
263
|
The completion response.
|
|
@@ -263,7 +266,11 @@ class SelfHostedLLMClient(_BaseLiteLLMClient):
|
|
|
263
266
|
return await super().acompletion(messages)
|
|
264
267
|
return await self._atext_completion(messages)
|
|
265
268
|
|
|
266
|
-
def completion(
|
|
269
|
+
def completion(
|
|
270
|
+
self,
|
|
271
|
+
messages: Union[List[dict], List[str], str],
|
|
272
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
273
|
+
) -> LLMResponse:
|
|
267
274
|
"""Completion of the model with the given messages.
|
|
268
275
|
|
|
269
276
|
Method overrides the base class method to call the appropriate
|
|
@@ -273,6 +280,7 @@ class SelfHostedLLMClient(_BaseLiteLLMClient):
|
|
|
273
280
|
|
|
274
281
|
Args:
|
|
275
282
|
messages: The messages to be used for completion.
|
|
283
|
+
metadata: Optional metadata to be passed to the LLM call.
|
|
276
284
|
|
|
277
285
|
Returns:
|
|
278
286
|
The completion response.
|
|
@@ -3,6 +3,7 @@ import sys
|
|
|
3
3
|
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
5
5
|
from rasa.shared.constants import (
|
|
6
|
+
LANGFUSE_CUSTOM_METADATA_DICT,
|
|
6
7
|
LLM_API_HEALTH_CHECK_DEFAULT_VALUE,
|
|
7
8
|
LLM_API_HEALTH_CHECK_ENV_VAR,
|
|
8
9
|
MODELS_CONFIG_KEY,
|
|
@@ -198,7 +199,12 @@ def send_test_llm_api_request(
|
|
|
198
199
|
config=llm_client.config,
|
|
199
200
|
)
|
|
200
201
|
try:
|
|
201
|
-
llm_client.completion(
|
|
202
|
+
llm_client.completion(
|
|
203
|
+
"hello",
|
|
204
|
+
metadata={
|
|
205
|
+
LANGFUSE_CUSTOM_METADATA_DICT: {"component": log_source_component}
|
|
206
|
+
},
|
|
207
|
+
)
|
|
202
208
|
except Exception as e:
|
|
203
209
|
structlogger.error(
|
|
204
210
|
f"{log_source_function}.send_test_llm_api_request_failed",
|
|
@@ -372,6 +372,7 @@ def extract_llm_config(
|
|
|
372
372
|
def extract_attrs_for_llm_based_command_generator(
|
|
373
373
|
self: "LLMBasedCommandGenerator",
|
|
374
374
|
prompt: str,
|
|
375
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
375
376
|
) -> Dict[str, Any]:
|
|
376
377
|
from rasa.dialogue_understanding.generator.flow_retrieval import (
|
|
377
378
|
DEFAULT_EMBEDDINGS_CONFIG,
|
|
@@ -387,8 +388,7 @@ def extract_attrs_for_llm_based_command_generator(
|
|
|
387
388
|
|
|
388
389
|
|
|
389
390
|
def extract_attrs_for_contextual_response_rephraser(
|
|
390
|
-
self: Any,
|
|
391
|
-
prompt: str,
|
|
391
|
+
self: Any, prompt: str, sender_id: str
|
|
392
392
|
) -> Dict[str, Any]:
|
|
393
393
|
from rasa.core.nlg.contextual_response_rephraser import DEFAULT_LLM_CONFIG
|
|
394
394
|
|
|
@@ -721,7 +721,7 @@ def extract_attrs_for_intentless_policy_find_closest_response(
|
|
|
721
721
|
|
|
722
722
|
|
|
723
723
|
def extract_attrs_for_intentless_policy_generate_llm_answer(
|
|
724
|
-
self: "IntentlessPolicy", llm: "BaseLLM", prompt: str
|
|
724
|
+
self: "IntentlessPolicy", llm: "BaseLLM", prompt: str, sender_id: str
|
|
725
725
|
) -> Dict[str, Any]:
|
|
726
726
|
from rasa.core.policies.intentless_policy import (
|
|
727
727
|
DEFAULT_EMBEDDINGS_CONFIG,
|
|
@@ -738,7 +738,7 @@ def extract_attrs_for_intentless_policy_generate_llm_answer(
|
|
|
738
738
|
|
|
739
739
|
|
|
740
740
|
def extract_attrs_for_enterprise_search_generate_llm_answer(
|
|
741
|
-
self: "EnterpriseSearchPolicy", llm: "BaseLLM", prompt: str
|
|
741
|
+
self: "EnterpriseSearchPolicy", llm: "BaseLLM", prompt: str, sender_id: str
|
|
742
742
|
) -> Dict[str, Any]:
|
|
743
743
|
from rasa.core.policies.enterprise_search_policy import (
|
|
744
744
|
DEFAULT_EMBEDDINGS_CONFIG,
|
|
@@ -121,12 +121,13 @@ def _instrument_generate_answer(
|
|
|
121
121
|
response_examples: List[str],
|
|
122
122
|
conversation_samples: List[str],
|
|
123
123
|
history: str,
|
|
124
|
+
sender_id: str,
|
|
124
125
|
) -> Optional[str]:
|
|
125
126
|
with tracer.start_as_current_span(
|
|
126
127
|
f"{self.__class__.__name__}.{fn.__name__}"
|
|
127
128
|
) as span:
|
|
128
129
|
llm_response = await fn(
|
|
129
|
-
self, response_examples, conversation_samples, history
|
|
130
|
+
self, response_examples, conversation_samples, history, sender_id
|
|
130
131
|
)
|
|
131
132
|
span.set_attributes(
|
|
132
133
|
{
|
rasa/utils/licensing.py
CHANGED
|
@@ -539,3 +539,18 @@ async def _count_conversations_after(
|
|
|
539
539
|
return 0
|
|
540
540
|
|
|
541
541
|
return await tracker_store.count_conversations(after_timestamp=after_timestamp)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def get_human_readable_licence_owner() -> str:
|
|
545
|
+
user_id = "unknown"
|
|
546
|
+
|
|
547
|
+
try:
|
|
548
|
+
retrieved_license = retrieve_license_from_env()
|
|
549
|
+
if retrieved_license:
|
|
550
|
+
decoded = License.decode(retrieved_license)
|
|
551
|
+
if decoded:
|
|
552
|
+
user_id = (
|
|
553
|
+
f"{decoded.company or ''}_{decoded.email or ''}_{decoded.jti or ''}"
|
|
554
|
+
)
|
|
555
|
+
finally:
|
|
556
|
+
return user_id
|
rasa/version.py
CHANGED