rasa-pro 3.12.0.dev13__py3-none-any.whl → 3.12.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/anonymization/anonymization_rule_executor.py +16 -10
- rasa/cli/data.py +16 -0
- rasa/cli/project_templates/calm/config.yml +2 -2
- rasa/cli/project_templates/calm/endpoints.yml +2 -2
- rasa/cli/utils.py +12 -0
- rasa/core/actions/action.py +84 -191
- rasa/core/actions/action_run_slot_rejections.py +16 -4
- rasa/core/channels/__init__.py +2 -0
- rasa/core/channels/studio_chat.py +19 -0
- rasa/core/channels/telegram.py +42 -24
- rasa/core/channels/voice_ready/utils.py +1 -1
- rasa/core/channels/voice_stream/asr/asr_engine.py +10 -4
- rasa/core/channels/voice_stream/asr/azure.py +14 -1
- rasa/core/channels/voice_stream/asr/deepgram.py +20 -4
- rasa/core/channels/voice_stream/audiocodes.py +264 -0
- rasa/core/channels/voice_stream/browser_audio.py +4 -1
- rasa/core/channels/voice_stream/call_state.py +3 -0
- rasa/core/channels/voice_stream/genesys.py +6 -2
- rasa/core/channels/voice_stream/tts/azure.py +9 -1
- rasa/core/channels/voice_stream/tts/cartesia.py +14 -8
- rasa/core/channels/voice_stream/voice_channel.py +23 -2
- rasa/core/constants.py +2 -0
- rasa/core/nlg/contextual_response_rephraser.py +18 -1
- rasa/core/nlg/generator.py +83 -15
- rasa/core/nlg/response.py +6 -3
- rasa/core/nlg/translate.py +55 -0
- rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +1 -1
- rasa/core/policies/flows/flow_executor.py +12 -5
- rasa/core/processor.py +72 -9
- rasa/dialogue_understanding/commands/can_not_handle_command.py +20 -2
- rasa/dialogue_understanding/commands/cancel_flow_command.py +24 -6
- rasa/dialogue_understanding/commands/change_flow_command.py +20 -2
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +20 -2
- rasa/dialogue_understanding/commands/clarify_command.py +29 -3
- rasa/dialogue_understanding/commands/command.py +1 -16
- rasa/dialogue_understanding/commands/command_syntax_manager.py +55 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +20 -2
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +20 -2
- rasa/dialogue_understanding/commands/prompt_command.py +94 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +20 -2
- rasa/dialogue_understanding/commands/set_slot_command.py +24 -2
- rasa/dialogue_understanding/commands/skip_question_command.py +20 -2
- rasa/dialogue_understanding/commands/start_flow_command.py +20 -2
- rasa/dialogue_understanding/commands/utils.py +98 -4
- rasa/dialogue_understanding/generator/__init__.py +2 -0
- rasa/dialogue_understanding/generator/command_parser.py +15 -12
- rasa/dialogue_understanding/generator/constants.py +3 -0
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +12 -5
- rasa/dialogue_understanding/generator/llm_command_generator.py +5 -3
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +16 -2
- rasa/dialogue_understanding/generator/prompt_templates/__init__.py +0 -0
- rasa/dialogue_understanding/generator/{single_step → prompt_templates}/command_prompt_template.jinja2 +2 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_claude_3_5_sonnet_20240620_template.jinja2 +77 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_default.jinja2 +68 -0
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v2_gpt_4o_2024_11_20_template.jinja2 +84 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +460 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +12 -310
- rasa/dialogue_understanding/patterns/collect_information.py +1 -1
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +16 -0
- rasa/dialogue_understanding/patterns/validate_slot.py +65 -0
- rasa/dialogue_understanding/processor/command_processor.py +39 -0
- rasa/dialogue_understanding_test/du_test_case.py +28 -8
- rasa/dialogue_understanding_test/du_test_result.py +13 -9
- rasa/dialogue_understanding_test/io.py +14 -0
- rasa/e2e_test/utils/io.py +0 -37
- rasa/engine/graph.py +1 -0
- rasa/engine/language.py +140 -0
- rasa/engine/recipes/config_files/default_config.yml +4 -0
- rasa/engine/recipes/default_recipe.py +2 -0
- rasa/engine/recipes/graph_recipe.py +2 -0
- rasa/engine/storage/local_model_storage.py +1 -0
- rasa/engine/storage/storage.py +4 -1
- rasa/model_manager/runner_service.py +7 -4
- rasa/model_manager/socket_bridge.py +7 -6
- rasa/shared/constants.py +15 -13
- rasa/shared/core/constants.py +2 -0
- rasa/shared/core/flows/constants.py +11 -0
- rasa/shared/core/flows/flow.py +83 -19
- rasa/shared/core/flows/flows_yaml_schema.json +31 -3
- rasa/shared/core/flows/steps/collect.py +1 -36
- rasa/shared/core/flows/utils.py +28 -4
- rasa/shared/core/flows/validation.py +1 -1
- rasa/shared/core/slot_mappings.py +208 -5
- rasa/shared/core/slots.py +131 -1
- rasa/shared/core/trackers.py +74 -1
- rasa/shared/importers/importer.py +50 -2
- rasa/shared/nlu/training_data/schemas/responses.yml +19 -12
- rasa/shared/providers/_configs/azure_entra_id_config.py +541 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +138 -3
- rasa/shared/providers/_configs/client_config.py +3 -1
- rasa/shared/providers/_configs/default_litellm_client_config.py +3 -1
- rasa/shared/providers/_configs/huggingface_local_embedding_client_config.py +3 -1
- rasa/shared/providers/_configs/litellm_router_client_config.py +3 -1
- rasa/shared/providers/_configs/model_group_config.py +4 -2
- rasa/shared/providers/_configs/oauth_config.py +33 -0
- rasa/shared/providers/_configs/openai_client_config.py +3 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +3 -1
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +3 -1
- rasa/shared/providers/constants.py +6 -0
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +28 -3
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +3 -1
- rasa/shared/providers/llm/_base_litellm_client.py +42 -17
- rasa/shared/providers/llm/azure_openai_llm_client.py +81 -25
- rasa/shared/providers/llm/default_litellm_llm_client.py +3 -1
- rasa/shared/providers/llm/litellm_router_llm_client.py +29 -8
- rasa/shared/providers/llm/llm_client.py +23 -7
- rasa/shared/providers/llm/openai_llm_client.py +9 -3
- rasa/shared/providers/llm/rasa_llm_client.py +11 -2
- rasa/shared/providers/llm/self_hosted_llm_client.py +30 -11
- rasa/shared/providers/router/_base_litellm_router_client.py +3 -1
- rasa/shared/providers/router/router_client.py +3 -1
- rasa/shared/utils/constants.py +3 -0
- rasa/shared/utils/llm.py +30 -7
- rasa/shared/utils/pykwalify_extensions.py +24 -0
- rasa/shared/utils/schemas/domain.yml +26 -0
- rasa/telemetry.py +2 -1
- rasa/tracing/config.py +2 -0
- rasa/tracing/constants.py +12 -0
- rasa/tracing/instrumentation/instrumentation.py +36 -0
- rasa/tracing/instrumentation/metrics.py +41 -0
- rasa/tracing/metric_instrument_provider.py +40 -0
- rasa/validator.py +372 -7
- rasa/version.py +1 -1
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/METADATA +2 -1
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/RECORD +128 -113
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/WHEEL +0 -0
- {rasa_pro-3.12.0.dev13.dist-info → rasa_pro-3.12.0rc1.dist-info}/entry_points.txt +0 -0
rasa/shared/utils/llm.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import importlib.resources
|
|
1
2
|
import json
|
|
2
3
|
from copy import deepcopy
|
|
3
4
|
from functools import wraps
|
|
@@ -19,6 +20,8 @@ import structlog
|
|
|
19
20
|
import rasa.shared.utils.io
|
|
20
21
|
from rasa.core.utils import AvailableEndpoints
|
|
21
22
|
from rasa.shared.constants import (
|
|
23
|
+
DEFAULT_PROMPT_PACKAGE_NAME,
|
|
24
|
+
MODEL_CONFIG_KEY,
|
|
22
25
|
MODEL_GROUP_CONFIG_KEY,
|
|
23
26
|
MODEL_GROUP_ID_CONFIG_KEY,
|
|
24
27
|
MODELS_CONFIG_KEY,
|
|
@@ -29,9 +32,7 @@ from rasa.shared.constants import (
|
|
|
29
32
|
)
|
|
30
33
|
from rasa.shared.core.events import BotUttered, UserUttered
|
|
31
34
|
from rasa.shared.core.slots import BooleanSlot, CategoricalSlot, Slot
|
|
32
|
-
from rasa.shared.engine.caching import
|
|
33
|
-
get_local_cache_location,
|
|
34
|
-
)
|
|
35
|
+
from rasa.shared.engine.caching import get_local_cache_location
|
|
35
36
|
from rasa.shared.exceptions import (
|
|
36
37
|
FileIOException,
|
|
37
38
|
FileNotFoundException,
|
|
@@ -355,13 +356,13 @@ def _combine_single_model_configs(
|
|
|
355
356
|
)
|
|
356
357
|
# Checks for deprecated keys, resolves aliases and returns a valid config.
|
|
357
358
|
# This is done to ensure that the custom config is valid.
|
|
358
|
-
return client_config_clazz.from_dict(custom_config).to_dict()
|
|
359
|
+
return client_config_clazz.from_dict(deepcopy(custom_config)).to_dict()
|
|
359
360
|
|
|
360
361
|
# If the provider is the same in both configs
|
|
361
362
|
# OR provider is not specified in the custom config
|
|
362
363
|
# perform MERGE by overriding the default config keys and values
|
|
363
364
|
# with custom config keys and values.
|
|
364
|
-
merged_config = {**default_config
|
|
365
|
+
merged_config = {**deepcopy(default_config), **deepcopy(custom_config)}
|
|
365
366
|
# Check for deprecated keys, resolve aliases and return a valid config.
|
|
366
367
|
# This is done to ensure that the merged config is valid.
|
|
367
368
|
default_config_clazz = get_client_config_class_from_provider(
|
|
@@ -512,7 +513,7 @@ def llm_client_factory(
|
|
|
512
513
|
Returns:
|
|
513
514
|
Instantiated LLM based on the configuration.
|
|
514
515
|
"""
|
|
515
|
-
config = combine_custom_and_default_config(custom_config, default_config)
|
|
516
|
+
config = combine_custom_and_default_config(deepcopy(custom_config), default_config)
|
|
516
517
|
|
|
517
518
|
ensure_cache()
|
|
518
519
|
|
|
@@ -641,7 +642,7 @@ def embedder_client_factory(
|
|
|
641
642
|
Returns:
|
|
642
643
|
Instantiated Embedder based on the configuration.
|
|
643
644
|
"""
|
|
644
|
-
config = combine_custom_and_default_config(custom_config, default_config)
|
|
645
|
+
config = combine_custom_and_default_config(deepcopy(custom_config), default_config)
|
|
645
646
|
|
|
646
647
|
ensure_cache()
|
|
647
648
|
|
|
@@ -675,6 +676,28 @@ def get_prompt_template(
|
|
|
675
676
|
return default_prompt_template
|
|
676
677
|
|
|
677
678
|
|
|
679
|
+
def get_default_prompt_template_based_on_model(
|
|
680
|
+
config: Dict[str, Any],
|
|
681
|
+
model_prompt_mapping: Dict[str, Any],
|
|
682
|
+
fallback_prompt_path: str,
|
|
683
|
+
) -> Text:
|
|
684
|
+
"""Returns the default prompt template based on the model name.
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
config: The model config.
|
|
688
|
+
model_prompt_mapping: The mapping of model name to prompt template.
|
|
689
|
+
fallback_prompt_path: The fallback prompt path.
|
|
690
|
+
|
|
691
|
+
Returns:
|
|
692
|
+
The default prompt template.
|
|
693
|
+
"""
|
|
694
|
+
provider = config.get(PROVIDER_CONFIG_KEY)
|
|
695
|
+
model = config.get(MODEL_CONFIG_KEY, "")
|
|
696
|
+
model_name = model if provider and provider in model else f"{provider}/{model}"
|
|
697
|
+
prompt_file_path = model_prompt_mapping.get(model_name, fallback_prompt_path)
|
|
698
|
+
return importlib.resources.read_text(DEFAULT_PROMPT_PACKAGE_NAME, prompt_file_path)
|
|
699
|
+
|
|
700
|
+
|
|
678
701
|
def allowed_values_for_slot(slot: Slot) -> Union[str, None]:
|
|
679
702
|
"""Get the allowed values for a slot."""
|
|
680
703
|
if isinstance(slot, BooleanSlot):
|
|
@@ -8,6 +8,11 @@ from typing import Any, Dict, List, Text, Union
|
|
|
8
8
|
|
|
9
9
|
from pykwalify.errors import SchemaError
|
|
10
10
|
|
|
11
|
+
from rasa.shared.utils.constants import (
|
|
12
|
+
RASA_PRO_BETA_PREDICATES_IN_RESPONSE_CONDITIONS_ENV_VAR_NAME,
|
|
13
|
+
)
|
|
14
|
+
from rasa.utils.beta import ensure_beta_feature_is_enabled
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
def require_response_keys(
|
|
13
18
|
responses: List[Dict[Text, Any]], _: Dict, __: Text
|
|
@@ -24,4 +29,23 @@ def require_response_keys(
|
|
|
24
29
|
"null 'text' value in response."
|
|
25
30
|
)
|
|
26
31
|
|
|
32
|
+
conditions = response.get("condition", [])
|
|
33
|
+
if isinstance(conditions, str):
|
|
34
|
+
ensure_beta_feature_is_enabled(
|
|
35
|
+
"predicates in response conditions",
|
|
36
|
+
RASA_PRO_BETA_PREDICATES_IN_RESPONSE_CONDITIONS_ENV_VAR_NAME,
|
|
37
|
+
)
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
for condition in conditions:
|
|
41
|
+
if not isinstance(condition, dict):
|
|
42
|
+
return SchemaError("Condition must be a dictionary.")
|
|
43
|
+
if not all(key in condition for key in ("type", "name", "value")):
|
|
44
|
+
return SchemaError(
|
|
45
|
+
"Condition must have 'type', 'name', and 'value' keys."
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if condition.get("type") != "slot":
|
|
49
|
+
return SchemaError("Condition type must be of type `slot`.")
|
|
50
|
+
|
|
27
51
|
return True
|
|
@@ -98,6 +98,11 @@ mapping:
|
|
|
98
98
|
type: "any"
|
|
99
99
|
action:
|
|
100
100
|
type: "str"
|
|
101
|
+
run_action_every_turn:
|
|
102
|
+
type: "str"
|
|
103
|
+
coexistence_system:
|
|
104
|
+
type: "str"
|
|
105
|
+
enum: ["NLU", "CALM", "SHARED"]
|
|
101
106
|
conditions:
|
|
102
107
|
type: "seq"
|
|
103
108
|
sequence:
|
|
@@ -111,6 +116,27 @@ mapping:
|
|
|
111
116
|
active_flow:
|
|
112
117
|
type: "str"
|
|
113
118
|
nullable: True
|
|
119
|
+
validation:
|
|
120
|
+
type: map
|
|
121
|
+
required: false
|
|
122
|
+
mapping:
|
|
123
|
+
rejections:
|
|
124
|
+
type: seq
|
|
125
|
+
required: true
|
|
126
|
+
sequence:
|
|
127
|
+
- type: map
|
|
128
|
+
mapping:
|
|
129
|
+
if:
|
|
130
|
+
type: str
|
|
131
|
+
required: true
|
|
132
|
+
utter:
|
|
133
|
+
type: str
|
|
134
|
+
required: true
|
|
135
|
+
refill_utter:
|
|
136
|
+
type: str
|
|
137
|
+
required: false
|
|
138
|
+
nullable: false
|
|
139
|
+
|
|
114
140
|
forms:
|
|
115
141
|
type: "map"
|
|
116
142
|
required: False
|
rasa/telemetry.py
CHANGED
|
@@ -529,7 +529,6 @@ def _send_request(url: Text, payload: Dict[Text, Any]) -> None:
|
|
|
529
529
|
"""
|
|
530
530
|
if _is_telemetry_debug_enabled():
|
|
531
531
|
print_telemetry_payload(payload)
|
|
532
|
-
return
|
|
533
532
|
|
|
534
533
|
write_key = _get_telemetry_write_key()
|
|
535
534
|
if not write_key:
|
|
@@ -1126,6 +1125,7 @@ def _get_llm_command_generator_config(config: Dict[str, Any]) -> Optional[Dict]:
|
|
|
1126
1125
|
retrieval is enabled, and flow retrieval embedding model.
|
|
1127
1126
|
"""
|
|
1128
1127
|
from rasa.dialogue_understanding.generator import (
|
|
1128
|
+
CompactLLMCommandGenerator,
|
|
1129
1129
|
LLMCommandGenerator,
|
|
1130
1130
|
MultiStepLLMCommandGenerator,
|
|
1131
1131
|
SingleStepLLMCommandGenerator,
|
|
@@ -1155,6 +1155,7 @@ def _get_llm_command_generator_config(config: Dict[str, Any]) -> Optional[Dict]:
|
|
|
1155
1155
|
LLMCommandGenerator.__name__,
|
|
1156
1156
|
SingleStepLLMCommandGenerator.__name__,
|
|
1157
1157
|
MultiStepLLMCommandGenerator.__name__,
|
|
1158
|
+
CompactLLMCommandGenerator.__name__,
|
|
1158
1159
|
]:
|
|
1159
1160
|
return component
|
|
1160
1161
|
return None
|
rasa/tracing/config.py
CHANGED
|
@@ -29,6 +29,7 @@ from rasa.dialogue_understanding.commands import (
|
|
|
29
29
|
FreeFormAnswerCommand,
|
|
30
30
|
)
|
|
31
31
|
from rasa.dialogue_understanding.generator import (
|
|
32
|
+
CompactLLMCommandGenerator,
|
|
32
33
|
LLMCommandGenerator,
|
|
33
34
|
MultiStepLLMCommandGenerator,
|
|
34
35
|
SingleStepLLMCommandGenerator,
|
|
@@ -110,6 +111,7 @@ def configure_tracing(tracer_provider: Optional[TracerProvider]) -> None:
|
|
|
110
111
|
endpoint_config_class=EndpointConfig,
|
|
111
112
|
grpc_custom_action_executor_class=GRPCCustomActionExecutor,
|
|
112
113
|
single_step_llm_command_generator_class=SingleStepLLMCommandGenerator,
|
|
114
|
+
compact_llm_command_generator_class=CompactLLMCommandGenerator,
|
|
113
115
|
multi_step_llm_command_generator_class=MultiStepLLMCommandGenerator,
|
|
114
116
|
custom_action_executor_subclasses=custom_action_executor_subclasses,
|
|
115
117
|
flow_retrieval_class=FlowRetrieval,
|
rasa/tracing/constants.py
CHANGED
|
@@ -32,6 +32,18 @@ SINGLE_STEP_LLM_COMMAND_GENERATOR_PROMPT_TOKEN_USAGE_METRIC_NAME = (
|
|
|
32
32
|
SINGLE_STEP_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME = (
|
|
33
33
|
"single_step_llm_command_generator_llm_response_duration"
|
|
34
34
|
)
|
|
35
|
+
COMPACT_LLM_COMMAND_GENERATOR_CPU_USAGE_METRIC_NAME = (
|
|
36
|
+
"compact_llm_command_generator_cpu_usage"
|
|
37
|
+
)
|
|
38
|
+
COMPACT_LLM_COMMAND_GENERATOR_MEMORY_USAGE_METRIC_NAME = (
|
|
39
|
+
"compact_llm_command_generator_memory_usage"
|
|
40
|
+
)
|
|
41
|
+
COMPACT_LLM_COMMAND_GENERATOR_PROMPT_TOKEN_USAGE_METRIC_NAME = (
|
|
42
|
+
"compact_llm_command_generator_prompt_token_usage"
|
|
43
|
+
)
|
|
44
|
+
COMPACT_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME = (
|
|
45
|
+
"compact_llm_command_generator_llm_response_duration"
|
|
46
|
+
)
|
|
35
47
|
MULTI_STEP_LLM_COMMAND_GENERATOR_CPU_USAGE_METRIC_NAME = (
|
|
36
48
|
"multi_step_llm_command_generator_cpu_usage"
|
|
37
49
|
)
|
|
@@ -41,6 +41,7 @@ from rasa.core.processor import MessageProcessor
|
|
|
41
41
|
from rasa.core.tracker_store import TrackerStore
|
|
42
42
|
from rasa.dialogue_understanding.commands import Command
|
|
43
43
|
from rasa.dialogue_understanding.generator import (
|
|
44
|
+
CompactLLMCommandGenerator,
|
|
44
45
|
LLMCommandGenerator,
|
|
45
46
|
MultiStepLLMCommandGenerator,
|
|
46
47
|
SingleStepLLMCommandGenerator,
|
|
@@ -64,6 +65,7 @@ from rasa.tracing.instrumentation.intentless_policy_instrumentation import (
|
|
|
64
65
|
)
|
|
65
66
|
from rasa.tracing.instrumentation.metrics import (
|
|
66
67
|
record_callable_duration_metrics,
|
|
68
|
+
record_compact_llm_command_generator_metrics,
|
|
67
69
|
record_llm_command_generator_metrics,
|
|
68
70
|
record_multi_step_llm_command_generator_metrics,
|
|
69
71
|
record_request_size_in_bytes,
|
|
@@ -286,6 +288,9 @@ LLMCommandGeneratorType = TypeVar("LLMCommandGeneratorType", bound=LLMCommandGen
|
|
|
286
288
|
SingleStepLLMCommandGeneratorType = TypeVar(
|
|
287
289
|
"SingleStepLLMCommandGeneratorType", bound=SingleStepLLMCommandGenerator
|
|
288
290
|
)
|
|
291
|
+
CompactLLMCommandGeneratorType = TypeVar(
|
|
292
|
+
"CompactLLMCommandGeneratorType", bound=CompactLLMCommandGenerator
|
|
293
|
+
)
|
|
289
294
|
MultiStepLLMCommandGeneratorType = TypeVar(
|
|
290
295
|
"MultiStepLLMCommandGeneratorType", bound=MultiStepLLMCommandGenerator
|
|
291
296
|
)
|
|
@@ -318,6 +323,9 @@ def instrument(
|
|
|
318
323
|
single_step_llm_command_generator_class: Optional[
|
|
319
324
|
Type[SingleStepLLMCommandGeneratorType]
|
|
320
325
|
] = None,
|
|
326
|
+
compact_llm_command_generator_class: Optional[
|
|
327
|
+
Type[CompactLLMCommandGeneratorType]
|
|
328
|
+
] = None,
|
|
321
329
|
multi_step_llm_command_generator_class: Optional[
|
|
322
330
|
Type[MultiStepLLMCommandGeneratorType]
|
|
323
331
|
] = None,
|
|
@@ -368,6 +376,9 @@ def instrument(
|
|
|
368
376
|
:param single_step_llm_command_generator_class: The `SingleStepLLMCommandGenerator`
|
|
369
377
|
to be instrumented. If `None` is given, no `SingleStepLLMCommandGenerator` will
|
|
370
378
|
be instrumented.
|
|
379
|
+
:param compact_llm_command_generator_class: The `CompactLLMCommandGenerator`
|
|
380
|
+
to be instrumented. If `None` is given, no `CompactLLMCommandGenerator` will
|
|
381
|
+
be instrumented.
|
|
371
382
|
:param multi_step_llm_command_generator_class: The `MultiStepLLMCommandGenerator`
|
|
372
383
|
to be instrumented. If `None` is given, no `MultiStepLLMCommandGenerator` will
|
|
373
384
|
be instrumented.
|
|
@@ -492,6 +503,30 @@ def instrument(
|
|
|
492
503
|
)
|
|
493
504
|
mark_class_as_instrumented(single_step_llm_command_generator_class)
|
|
494
505
|
|
|
506
|
+
if compact_llm_command_generator_class is not None and not class_is_instrumented(
|
|
507
|
+
compact_llm_command_generator_class
|
|
508
|
+
):
|
|
509
|
+
_instrument_method(
|
|
510
|
+
tracer_provider.get_tracer(compact_llm_command_generator_class.__module__),
|
|
511
|
+
compact_llm_command_generator_class,
|
|
512
|
+
"invoke_llm",
|
|
513
|
+
attribute_extractors.extract_attrs_for_llm_based_command_generator,
|
|
514
|
+
metrics_recorder=record_compact_llm_command_generator_metrics,
|
|
515
|
+
)
|
|
516
|
+
_instrument_method(
|
|
517
|
+
tracer_provider.get_tracer(compact_llm_command_generator_class.__module__),
|
|
518
|
+
compact_llm_command_generator_class,
|
|
519
|
+
"_check_commands_against_startable_flows",
|
|
520
|
+
attribute_extractors.extract_attrs_for_check_commands_against_startable_flows,
|
|
521
|
+
)
|
|
522
|
+
_instrument_perform_health_check_method_for_component(
|
|
523
|
+
tracer_provider.get_tracer(compact_llm_command_generator_class.__module__),
|
|
524
|
+
compact_llm_command_generator_class,
|
|
525
|
+
"perform_llm_health_check",
|
|
526
|
+
attribute_extractors.extract_attrs_for_performing_health_check,
|
|
527
|
+
)
|
|
528
|
+
mark_class_as_instrumented(compact_llm_command_generator_class)
|
|
529
|
+
|
|
495
530
|
if multi_step_llm_command_generator_class is not None and not class_is_instrumented(
|
|
496
531
|
multi_step_llm_command_generator_class
|
|
497
532
|
):
|
|
@@ -526,6 +561,7 @@ def instrument(
|
|
|
526
561
|
for llm_based_command_generator_class in (
|
|
527
562
|
llm_command_generator_class,
|
|
528
563
|
single_step_llm_command_generator_class,
|
|
564
|
+
compact_llm_command_generator_class,
|
|
529
565
|
multi_step_llm_command_generator_class,
|
|
530
566
|
)
|
|
531
567
|
)
|
|
@@ -6,11 +6,16 @@ from rasa.core.nlg.contextual_response_rephraser import ContextualResponseRephra
|
|
|
6
6
|
from rasa.core.policies.enterprise_search_policy import EnterpriseSearchPolicy
|
|
7
7
|
from rasa.core.policies.intentless_policy import IntentlessPolicy
|
|
8
8
|
from rasa.dialogue_understanding.generator import (
|
|
9
|
+
CompactLLMCommandGenerator,
|
|
9
10
|
LLMCommandGenerator,
|
|
10
11
|
MultiStepLLMCommandGenerator,
|
|
11
12
|
SingleStepLLMCommandGenerator,
|
|
12
13
|
)
|
|
13
14
|
from rasa.tracing.constants import (
|
|
15
|
+
COMPACT_LLM_COMMAND_GENERATOR_CPU_USAGE_METRIC_NAME,
|
|
16
|
+
COMPACT_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
17
|
+
COMPACT_LLM_COMMAND_GENERATOR_MEMORY_USAGE_METRIC_NAME,
|
|
18
|
+
COMPACT_LLM_COMMAND_GENERATOR_PROMPT_TOKEN_USAGE_METRIC_NAME,
|
|
14
19
|
CONTEXTUAL_RESPONSE_REPHRASER_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
15
20
|
ENTERPRISE_SEARCH_POLICY_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
16
21
|
INTENTLESS_POLICY_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
@@ -170,6 +175,36 @@ def record_single_step_llm_command_generator_metrics(
|
|
|
170
175
|
)
|
|
171
176
|
|
|
172
177
|
|
|
178
|
+
def record_compact_llm_command_generator_metrics(
|
|
179
|
+
attributes: Dict[str, Any],
|
|
180
|
+
) -> None:
|
|
181
|
+
"""
|
|
182
|
+
Record measurements for CompactLLMCommandGenerator specific metrics.
|
|
183
|
+
|
|
184
|
+
The recording is done by the opentelemetry.metrics.Histogram instruments.
|
|
185
|
+
These instruments are registered to the MetricInstrumentProvider internal singleton.
|
|
186
|
+
|
|
187
|
+
:param attributes: Extracted tracing attributes
|
|
188
|
+
:return: None
|
|
189
|
+
"""
|
|
190
|
+
instrument_provider = MetricInstrumentProvider()
|
|
191
|
+
|
|
192
|
+
if not instrument_provider.instruments:
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
record_llm_based_command_generator_cpu_usage(
|
|
196
|
+
instrument_provider, COMPACT_LLM_COMMAND_GENERATOR_CPU_USAGE_METRIC_NAME
|
|
197
|
+
)
|
|
198
|
+
record_llm_based_command_generator_memory_usage(
|
|
199
|
+
instrument_provider, COMPACT_LLM_COMMAND_GENERATOR_MEMORY_USAGE_METRIC_NAME
|
|
200
|
+
)
|
|
201
|
+
record_llm_based_command_generator_prompt_token(
|
|
202
|
+
instrument_provider,
|
|
203
|
+
attributes,
|
|
204
|
+
COMPACT_LLM_COMMAND_GENERATOR_PROMPT_TOKEN_USAGE_METRIC_NAME,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
173
208
|
def record_multi_step_llm_command_generator_metrics(attributes: Dict[str, Any]) -> None:
|
|
174
209
|
"""
|
|
175
210
|
Record measurements for MultiStepLLMCommandGenerator specific metrics.
|
|
@@ -205,6 +240,7 @@ def record_callable_duration_metrics(
|
|
|
205
240
|
Record duration of instrumented method calls invoked for the following components:
|
|
206
241
|
- LLMCommandGenerator
|
|
207
242
|
- SingleStepLLMCommandGenerator
|
|
243
|
+
- CompactLLMCommandGenerator
|
|
208
244
|
- MultiStepLLMCommandGenerator
|
|
209
245
|
- EnterpriseSearchPolicy
|
|
210
246
|
- IntentlessPolicy
|
|
@@ -235,6 +271,11 @@ def record_callable_duration_metrics(
|
|
|
235
271
|
SINGLE_STEP_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME
|
|
236
272
|
)
|
|
237
273
|
|
|
274
|
+
if type(self) == CompactLLMCommandGenerator:
|
|
275
|
+
metric_instrument = instrument_provider.get_instrument(
|
|
276
|
+
COMPACT_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME
|
|
277
|
+
)
|
|
278
|
+
|
|
238
279
|
if type(self) == MultiStepLLMCommandGenerator:
|
|
239
280
|
metric_instrument = instrument_provider.get_instrument(
|
|
240
281
|
MULTI_STEP_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME
|
|
@@ -4,6 +4,10 @@ from opentelemetry.metrics import get_meter_provider
|
|
|
4
4
|
from opentelemetry.sdk.metrics import Meter
|
|
5
5
|
|
|
6
6
|
from rasa.tracing.constants import (
|
|
7
|
+
COMPACT_LLM_COMMAND_GENERATOR_CPU_USAGE_METRIC_NAME,
|
|
8
|
+
COMPACT_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
9
|
+
COMPACT_LLM_COMMAND_GENERATOR_MEMORY_USAGE_METRIC_NAME,
|
|
10
|
+
COMPACT_LLM_COMMAND_GENERATOR_PROMPT_TOKEN_USAGE_METRIC_NAME,
|
|
7
11
|
CONTEXTUAL_RESPONSE_REPHRASER_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
8
12
|
DURATION_UNIT_NAME,
|
|
9
13
|
ENTERPRISE_SEARCH_POLICY_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
@@ -43,6 +47,7 @@ class MetricInstrumentProvider(metaclass=Singleton):
|
|
|
43
47
|
instruments = {
|
|
44
48
|
**self._create_llm_command_generator_instruments(meter),
|
|
45
49
|
**self._create_single_step_llm_command_generator_instruments(meter),
|
|
50
|
+
**self._create_compact_llm_command_generator_instruments(meter),
|
|
46
51
|
**self._create_multi_step_llm_command_generator_instruments(meter),
|
|
47
52
|
**self._create_llm_response_duration_instruments(meter),
|
|
48
53
|
**self._create_client_request_instruments(meter),
|
|
@@ -122,6 +127,41 @@ class MetricInstrumentProvider(metaclass=Singleton):
|
|
|
122
127
|
SINGLE_STEP_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME: single_step_llm_response_duration_llm_command_generator, # noqa: E501
|
|
123
128
|
}
|
|
124
129
|
|
|
130
|
+
@staticmethod
|
|
131
|
+
def _create_compact_llm_command_generator_instruments(
|
|
132
|
+
meter: Meter,
|
|
133
|
+
) -> Dict[str, Any]:
|
|
134
|
+
compact_llm_command_generator_cpu_usage = meter.create_histogram(
|
|
135
|
+
name=COMPACT_LLM_COMMAND_GENERATOR_CPU_USAGE_METRIC_NAME,
|
|
136
|
+
description="CPU percentage for CompactLLMCommandGenerator",
|
|
137
|
+
unit=LLM_BASED_COMMAND_GENERATOR_CPU_MEMORY_USAGE_UNIT_NAME,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
compact_llm_command_generator_memory_usage = meter.create_histogram(
|
|
141
|
+
name=COMPACT_LLM_COMMAND_GENERATOR_MEMORY_USAGE_METRIC_NAME,
|
|
142
|
+
description="RAM memory usage for CompactLLMCommandGenerator",
|
|
143
|
+
unit=LLM_BASED_COMMAND_GENERATOR_CPU_MEMORY_USAGE_UNIT_NAME,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
compact_llm_command_generator_prompt_token_usage = meter.create_histogram(
|
|
147
|
+
name=COMPACT_LLM_COMMAND_GENERATOR_PROMPT_TOKEN_USAGE_METRIC_NAME,
|
|
148
|
+
description="CompactLLMCommandGenerator prompt token length",
|
|
149
|
+
unit="1",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
compact_llm_response_duration_llm_command_generator = meter.create_histogram(
|
|
153
|
+
name=COMPACT_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME,
|
|
154
|
+
description="The duration of CompactLLMCommandGenerator's LLM call",
|
|
155
|
+
unit=DURATION_UNIT_NAME,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
COMPACT_LLM_COMMAND_GENERATOR_CPU_USAGE_METRIC_NAME: compact_llm_command_generator_cpu_usage, # noqa: E501
|
|
160
|
+
COMPACT_LLM_COMMAND_GENERATOR_MEMORY_USAGE_METRIC_NAME: compact_llm_command_generator_memory_usage, # noqa: E501
|
|
161
|
+
COMPACT_LLM_COMMAND_GENERATOR_PROMPT_TOKEN_USAGE_METRIC_NAME: compact_llm_command_generator_prompt_token_usage, # noqa: E501
|
|
162
|
+
COMPACT_LLM_COMMAND_GENERATOR_LLM_RESPONSE_DURATION_METRIC_NAME: compact_llm_response_duration_llm_command_generator, # noqa: E501
|
|
163
|
+
}
|
|
164
|
+
|
|
125
165
|
@staticmethod
|
|
126
166
|
def _create_multi_step_llm_command_generator_instruments(
|
|
127
167
|
meter: Meter,
|