rasa-pro 3.12.0.dev5__py3-none-any.whl → 3.12.0.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/cli/dialogue_understanding_test.py +40 -5
- rasa/constants.py +0 -1
- rasa/core/actions/action.py +4 -2
- rasa/core/actions/custom_action_executor.py +1 -1
- rasa/core/channels/inspector/dist/assets/Tableau10-1b767f5e.js +1 -0
- rasa/core/channels/inspector/dist/assets/arc-f0f8bd46.js +1 -0
- rasa/core/channels/inspector/dist/assets/blockDiagram-38ab4fdb-7162c77d.js +118 -0
- rasa/core/channels/inspector/dist/assets/c4Diagram-3d4e48cf-b1d0d098.js +10 -0
- rasa/core/channels/inspector/dist/assets/channel-e265ea59.js +1 -0
- rasa/core/channels/inspector/dist/assets/classDiagram-70f12bd4-807a1b27.js +2 -0
- rasa/core/channels/inspector/dist/assets/classDiagram-v2-f2320105-5238dcdb.js +2 -0
- rasa/core/channels/inspector/dist/assets/clone-21f8a43d.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-89c73b31.js → createText-2e5e7dd3-75dfaa67.js} +1 -1
- rasa/core/channels/inspector/dist/assets/edges-e0da2a9e-df20501d.js +4 -0
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-907e0440.js → erDiagram-9861fffd-13cf4797.js} +4 -4
- rasa/core/channels/inspector/dist/assets/flowDb-956e92f1-a4991264.js +10 -0
- rasa/core/channels/inspector/dist/assets/flowDiagram-66a62f08-ccecf773.js +4 -0
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-5c8ce12d.js +1 -0
- rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-4a651766-b5801783.js +139 -0
- rasa/core/channels/inspector/dist/assets/ganttDiagram-c361ad54-161e079a.js +257 -0
- rasa/core/channels/inspector/dist/assets/gitGraphDiagram-72cf32ee-f38e86a4.js +70 -0
- rasa/core/channels/inspector/dist/assets/graph-be6ef5d8.js +1 -0
- rasa/core/channels/inspector/dist/assets/index-3862675e-d9ce8994.js +1 -0
- rasa/core/channels/inspector/dist/assets/{index-e793d777.js → index-7794b245.js} +200 -195
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-8ceba4db.js → infoDiagram-f8f76790-5000a3dc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-960d3809.js → journeyDiagram-49397b02-8ef0a17a.js} +4 -4
- rasa/core/channels/inspector/dist/assets/katex-498eb57e.js +261 -0
- rasa/core/channels/inspector/dist/assets/layout-d649bc98.js +1 -0
- rasa/core/channels/inspector/dist/assets/{line-eeccc4e2.js → line-95add810.js} +1 -1
- rasa/core/channels/inspector/dist/assets/linear-f6025094.js +1 -0
- rasa/core/channels/inspector/dist/assets/mindmap-definition-fc14e90a-2e8531c4.js +312 -0
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-dc9b5e1b.js → pieDiagram-8a3498a8-918adfdb.js} +7 -7
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-a08cba6d.js → quadrantDiagram-120e2f19-cbd01797.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-87242b9e.js → requirementDiagram-deff3bca-6a8b877b.js} +2 -2
- rasa/core/channels/inspector/dist/assets/sankeyDiagram-04a897e0-c377c3fe.js +8 -0
- rasa/core/channels/inspector/dist/assets/sequenceDiagram-704730f1-ab9e9b7f.js +122 -0
- rasa/core/channels/inspector/dist/assets/stateDiagram-587899a1-5e6ae67d.js +1 -0
- rasa/core/channels/inspector/dist/assets/stateDiagram-v2-d93cdb3a-40643476.js +1 -0
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-cef936a6.js → styles-6aaf32cf-afb8d108.js} +1 -1
- rasa/core/channels/inspector/dist/assets/styles-9a916d00-7edc9423.js +160 -0
- rasa/core/channels/inspector/dist/assets/styles-c10674c1-c1d8f7e9.js +116 -0
- rasa/core/channels/inspector/dist/assets/svgDrawCommon-08f97a94-f494b2ef.js +1 -0
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-0d39bdb2.js → timeline-definition-85554ec2-11c7cdd0.js} +3 -3
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-a03fa445.js → xychartDiagram-e933f94c-3f191ec1.js} +3 -3
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/package.json +10 -3
- rasa/core/channels/inspector/yarn.lock +89 -99
- rasa/core/channels/studio_chat.py +14 -0
- rasa/core/nlg/contextual_response_rephraser.py +2 -1
- rasa/core/policies/enterprise_search_policy.py +2 -1
- rasa/core/processor.py +3 -3
- rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -2
- rasa/dialogue_understanding/commands/cancel_flow_command.py +2 -2
- rasa/dialogue_understanding/commands/change_flow_command.py +2 -2
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -2
- rasa/dialogue_understanding/commands/clarify_command.py +2 -2
- rasa/dialogue_understanding/commands/human_handoff_command.py +2 -2
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -2
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -2
- rasa/dialogue_understanding/commands/set_slot_command.py +2 -2
- rasa/dialogue_understanding/commands/skip_question_command.py +2 -2
- rasa/dialogue_understanding/commands/start_flow_command.py +2 -2
- rasa/dialogue_understanding/commands/utils.py +1 -1
- rasa/dialogue_understanding/generator/command_parser.py +1 -1
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +3 -3
- rasa/dialogue_understanding/generator/single_step/command_prompt_template.jinja2 +41 -39
- rasa/dialogue_understanding_test/command_metric_calculation.py +8 -4
- rasa/dialogue_understanding_test/du_test_result.py +9 -0
- rasa/e2e_test/assertions.py +203 -174
- rasa/e2e_test/assertions_schema.yml +6 -0
- rasa/e2e_test/constants.py +16 -1
- rasa/e2e_test/e2e_config.py +102 -41
- rasa/e2e_test/e2e_config_schema.yml +28 -10
- rasa/e2e_test/llm_judge_prompts/answer_relevance_prompt_template.jinja2 +89 -0
- rasa/e2e_test/llm_judge_prompts/groundedness_prompt_template.jinja2 +165 -0
- rasa/e2e_test/utils/generative_assertions.py +243 -0
- rasa/server.py +3 -1
- rasa/shared/nlu/constants.py +1 -0
- rasa/shared/providers/llm/llm_response.py +21 -1
- rasa/shared/utils/llm.py +1 -1
- rasa/tracing/instrumentation/attribute_extractors.py +23 -7
- rasa/utils/common.py +0 -14
- rasa/version.py +1 -1
- {rasa_pro-3.12.0.dev5.dist-info → rasa_pro-3.12.0.dev6.dist-info}/METADATA +1 -3
- {rasa_pro-3.12.0.dev5.dist-info → rasa_pro-3.12.0.dev6.dist-info}/RECORD +88 -79
- rasa/core/channels/inspector/dist/assets/arc-632a63ec.js +0 -1
- rasa/core/channels/inspector/dist/assets/c4Diagram-d0fbc5ce-081e0df4.js +0 -10
- rasa/core/channels/inspector/dist/assets/classDiagram-936ed81e-3df0afc2.js +0 -2
- rasa/core/channels/inspector/dist/assets/classDiagram-v2-c3cb15f1-8c5ed31e.js +0 -2
- rasa/core/channels/inspector/dist/assets/edges-f2ad444c-4fc48c3e.js +0 -4
- rasa/core/channels/inspector/dist/assets/flowDb-1972c806-9ec53a3c.js +0 -6
- rasa/core/channels/inspector/dist/assets/flowDiagram-7ea5b25a-41da787a.js +0 -4
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-8bea338b.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowchart-elk-definition-abe16c3d-ce370633.js +0 -139
- rasa/core/channels/inspector/dist/assets/ganttDiagram-9b5ea136-90a36523.js +0 -266
- rasa/core/channels/inspector/dist/assets/gitGraphDiagram-99d0ae7c-41e1aa3f.js +0 -70
- rasa/core/channels/inspector/dist/assets/index-2c4b9a3b-e6f2af62.js +0 -1
- rasa/core/channels/inspector/dist/assets/layout-498807d8.js +0 -1
- rasa/core/channels/inspector/dist/assets/linear-8a078617.js +0 -1
- rasa/core/channels/inspector/dist/assets/mindmap-definition-beec6740-396d17dd.js +0 -109
- rasa/core/channels/inspector/dist/assets/sankeyDiagram-8f13d901-53f6f391.js +0 -8
- rasa/core/channels/inspector/dist/assets/sequenceDiagram-b655622a-715c9c20.js +0 -122
- rasa/core/channels/inspector/dist/assets/stateDiagram-59f0c015-2e8fb31f.js +0 -1
- rasa/core/channels/inspector/dist/assets/stateDiagram-v2-2b26beab-7e2d2aa0.js +0 -1
- rasa/core/channels/inspector/dist/assets/styles-080da4f6-4420cea6.js +0 -110
- rasa/core/channels/inspector/dist/assets/styles-3dcbcfbf-28676cf4.js +0 -159
- rasa/core/channels/inspector/dist/assets/svgDrawCommon-4835440b-151251e9.js +0 -1
- {rasa_pro-3.12.0.dev5.dist-info → rasa_pro-3.12.0.dev6.dist-info}/NOTICE +0 -0
- {rasa_pro-3.12.0.dev5.dist-info → rasa_pro-3.12.0.dev6.dist-info}/WHEEL +0 -0
- {rasa_pro-3.12.0.dev5.dist-info → rasa_pro-3.12.0.dev6.dist-info}/entry_points.txt +0 -0
rasa/e2e_test/e2e_config.py
CHANGED
|
@@ -1,26 +1,31 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import dataclasses
|
|
4
|
-
from dataclasses import dataclass
|
|
5
3
|
from pathlib import Path
|
|
6
|
-
from typing import Any, Dict, Generator, Optional
|
|
4
|
+
from typing import Any, Dict, Generator, Optional, Tuple
|
|
7
5
|
|
|
8
6
|
import structlog
|
|
9
|
-
from pydantic import BaseModel
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
10
8
|
|
|
11
9
|
from rasa.e2e_test.constants import (
|
|
10
|
+
DEFAULT_E2E_TESTING_MODEL,
|
|
12
11
|
E2E_CONFIG_SCHEMA_FILE_PATH,
|
|
13
|
-
|
|
12
|
+
KEY_EXTRA_PARAMETERS,
|
|
14
13
|
KEY_LLM_E2E_TEST_CONVERSION,
|
|
14
|
+
KEY_LLM_JUDGE,
|
|
15
15
|
)
|
|
16
16
|
from rasa.shared.constants import (
|
|
17
17
|
API_BASE_CONFIG_KEY,
|
|
18
18
|
DEPLOYMENT_CONFIG_KEY,
|
|
19
|
+
EMBEDDINGS_CONFIG_KEY,
|
|
19
20
|
MODEL_CONFIG_KEY,
|
|
21
|
+
MODELS_CONFIG_KEY,
|
|
20
22
|
OPENAI_PROVIDER,
|
|
21
23
|
PROVIDER_CONFIG_KEY,
|
|
22
24
|
)
|
|
23
|
-
from rasa.shared.
|
|
25
|
+
from rasa.shared.utils.llm import (
|
|
26
|
+
combine_custom_and_default_config,
|
|
27
|
+
resolve_model_client_config,
|
|
28
|
+
)
|
|
24
29
|
from rasa.shared.utils.yaml import (
|
|
25
30
|
parse_raw_yaml,
|
|
26
31
|
read_schema_file,
|
|
@@ -32,42 +37,103 @@ structlogger = structlog.get_logger()
|
|
|
32
37
|
CONFTEST_PATTERNS = ["conftest.yml", "conftest.yaml"]
|
|
33
38
|
|
|
34
39
|
|
|
35
|
-
class
|
|
36
|
-
"""
|
|
40
|
+
class BaseModelConfig(BaseModel):
|
|
41
|
+
"""Base class for model configurations used by generative assertions."""
|
|
37
42
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
43
|
+
provider: Optional[str] = None
|
|
44
|
+
model: Optional[str] = None
|
|
45
|
+
extra_parameters: Dict[str, Any] = Field(default_factory=dict)
|
|
46
|
+
model_group: Optional[str] = None
|
|
41
47
|
|
|
42
48
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
"""Class for storing the configuration of the LLM-As-Judge.
|
|
49
|
+
class LLMJudgeConfig(BaseModel):
|
|
50
|
+
"""Class for storing the configuration of the LLM-Judge.
|
|
46
51
|
|
|
47
|
-
The LLM-
|
|
52
|
+
The LLM-Judge is used to measure the factual correctness
|
|
48
53
|
(i.e., how grounded in the source documents the response is),
|
|
49
54
|
or relevance of the generated response during E2E testing.
|
|
50
55
|
"""
|
|
51
56
|
|
|
52
|
-
|
|
53
|
-
|
|
57
|
+
llm_config: BaseModelConfig
|
|
58
|
+
embeddings: Optional[BaseModelConfig] = None
|
|
54
59
|
|
|
55
|
-
@
|
|
56
|
-
def
|
|
60
|
+
@classmethod
|
|
61
|
+
def get_default_llm_config(cls) -> Dict[str, Any]:
|
|
62
|
+
return {
|
|
63
|
+
PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
|
|
64
|
+
MODEL_CONFIG_KEY: DEFAULT_E2E_TESTING_MODEL,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_dict(cls, config_data: Dict[str, Any]) -> LLMJudgeConfig:
|
|
57
69
|
"""Loads the configuration from a dictionary."""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
70
|
+
embeddings = config_data.pop(EMBEDDINGS_CONFIG_KEY, None)
|
|
71
|
+
llm_config = config_data.pop("llm", {})
|
|
72
|
+
|
|
73
|
+
llm_config = resolve_model_client_config(llm_config)
|
|
74
|
+
llm_config, extra_parameters = cls.extract_attributes(llm_config)
|
|
75
|
+
llm_config = combine_custom_and_default_config(
|
|
76
|
+
llm_config, cls.get_default_llm_config()
|
|
77
|
+
)
|
|
78
|
+
embeddings_config = resolve_model_client_config(embeddings)
|
|
63
79
|
|
|
64
|
-
return LLMJudgeConfig(
|
|
80
|
+
return LLMJudgeConfig(
|
|
81
|
+
llm_config=BaseModelConfig(extra_parameters=extra_parameters, **llm_config),
|
|
82
|
+
embeddings=BaseModelConfig(**embeddings_config)
|
|
83
|
+
if embeddings_config
|
|
84
|
+
else None,
|
|
85
|
+
)
|
|
65
86
|
|
|
66
|
-
|
|
67
|
-
|
|
87
|
+
@classmethod
|
|
88
|
+
def extract_attributes(
|
|
89
|
+
cls, llm_config: Dict[str, Any]
|
|
90
|
+
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
91
|
+
"""Extract the expected fields from the configuration."""
|
|
92
|
+
required_config = {}
|
|
93
|
+
|
|
94
|
+
expected_fields = [
|
|
95
|
+
PROVIDER_CONFIG_KEY,
|
|
96
|
+
MODEL_CONFIG_KEY,
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
if PROVIDER_CONFIG_KEY in llm_config:
|
|
100
|
+
required_config = {
|
|
101
|
+
expected_field: llm_config.pop(expected_field, None)
|
|
102
|
+
for expected_field in expected_fields
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
elif MODELS_CONFIG_KEY in llm_config:
|
|
106
|
+
llm_config = llm_config.pop(MODELS_CONFIG_KEY)[0]
|
|
107
|
+
|
|
108
|
+
required_config = {
|
|
109
|
+
expected_field: llm_config.pop(expected_field, None)
|
|
110
|
+
for expected_field in expected_fields
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
clean_config = clean_up_config(required_config)
|
|
114
|
+
return clean_config, llm_config
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def llm_config_as_dict(self) -> Dict[str, Any]:
|
|
118
|
+
return extract_config(self.llm_config)
|
|
68
119
|
|
|
69
|
-
|
|
70
|
-
|
|
120
|
+
@property
|
|
121
|
+
def embeddings_config_as_dict(self) -> Dict[str, Any]:
|
|
122
|
+
if self.embeddings is None:
|
|
123
|
+
return {}
|
|
124
|
+
|
|
125
|
+
return extract_config(self.embeddings)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def clean_up_config(config_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
129
|
+
"""Remove None values from the configuration."""
|
|
130
|
+
return {key: value for key, value in config_data.items() if value}
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def extract_config(config: BaseModelConfig) -> Dict[str, Any]:
|
|
134
|
+
clean_config = clean_up_config(dict(config))
|
|
135
|
+
extra_parameters = clean_config.pop(KEY_EXTRA_PARAMETERS, {})
|
|
136
|
+
return {**clean_config, **extra_parameters}
|
|
71
137
|
|
|
72
138
|
|
|
73
139
|
class LLME2ETestConverterConfig(BaseModel):
|
|
@@ -99,7 +165,10 @@ class LLME2ETestConverterConfig(BaseModel):
|
|
|
99
165
|
|
|
100
166
|
@classmethod
|
|
101
167
|
def get_default_config(cls) -> Dict[str, Any]:
|
|
102
|
-
return {
|
|
168
|
+
return {
|
|
169
|
+
PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
|
|
170
|
+
MODEL_CONFIG_KEY: DEFAULT_E2E_TESTING_MODEL,
|
|
171
|
+
}
|
|
103
172
|
|
|
104
173
|
@staticmethod
|
|
105
174
|
def _clean_up_config(config_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -160,24 +229,16 @@ def create_llm_judge_config(test_case_path: Optional[Path]) -> LLMJudgeConfig:
|
|
|
160
229
|
structlogger.debug("e2e_config.create_llm_judge_config.no_conftest_detected")
|
|
161
230
|
return LLMJudgeConfig.from_dict(config_data)
|
|
162
231
|
|
|
163
|
-
llm_judge_config_data = config_data.get(
|
|
232
|
+
llm_judge_config_data = config_data.get(KEY_LLM_JUDGE, {})
|
|
164
233
|
if not llm_judge_config_data:
|
|
165
|
-
structlogger.debug("e2e_config.create_llm_judge_config.
|
|
234
|
+
structlogger.debug("e2e_config.create_llm_judge_config.no_llm_judge_key")
|
|
166
235
|
|
|
167
236
|
structlogger.info(
|
|
168
237
|
"e2e_config.create_llm_judge_config.success",
|
|
169
238
|
llm_judge_config_data=llm_judge_config_data,
|
|
170
239
|
)
|
|
171
240
|
|
|
172
|
-
|
|
173
|
-
return LLMJudgeConfig.from_dict(llm_judge_config_data)
|
|
174
|
-
except InvalidLLMConfiguration as e:
|
|
175
|
-
structlogger.error(
|
|
176
|
-
"e2e_config.create_llm_judge_config.invalid_llm_configuration",
|
|
177
|
-
error_message=str(e),
|
|
178
|
-
event_info="Falling back to default configuration.",
|
|
179
|
-
)
|
|
180
|
-
return LLMJudgeConfig()
|
|
241
|
+
return LLMJudgeConfig.from_dict(llm_judge_config_data)
|
|
181
242
|
|
|
182
243
|
|
|
183
244
|
def create_llm_e2e_test_converter_config(
|
|
@@ -1,19 +1,37 @@
|
|
|
1
1
|
mapping:
|
|
2
|
-
|
|
2
|
+
llm_judge:
|
|
3
3
|
type: map
|
|
4
4
|
mapping:
|
|
5
|
-
|
|
6
|
-
type:
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
5
|
+
llm:
|
|
6
|
+
type: map
|
|
7
|
+
allowempty: true
|
|
8
|
+
mapping:
|
|
9
|
+
provider:
|
|
10
|
+
type: str
|
|
11
|
+
nullable: false
|
|
12
|
+
model:
|
|
13
|
+
type: str
|
|
14
|
+
nullable: false
|
|
15
|
+
model_group:
|
|
16
|
+
type: str
|
|
17
|
+
nullable: false
|
|
18
|
+
embeddings:
|
|
19
|
+
type: map
|
|
20
|
+
allowempty: true
|
|
21
|
+
mapping:
|
|
22
|
+
provider:
|
|
23
|
+
type: str
|
|
24
|
+
nullable: false
|
|
25
|
+
model:
|
|
26
|
+
type: str
|
|
27
|
+
nullable: false
|
|
28
|
+
model_group:
|
|
29
|
+
type: str
|
|
30
|
+
nullable: false
|
|
13
31
|
llm_e2e_test_conversion:
|
|
14
32
|
type: map
|
|
15
33
|
mapping:
|
|
16
|
-
|
|
34
|
+
provider:
|
|
17
35
|
type: str
|
|
18
36
|
model:
|
|
19
37
|
type: str
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
You are a detail-oriented judge whose task is to generate {{ num_variations }} question variations based on the provided answer.
|
|
2
|
+
Each question variation should be a question that the input answer is attempting to address.
|
|
3
|
+
|
|
4
|
+
## Task Steps
|
|
5
|
+
|
|
6
|
+
Follow these steps to complete the task:
|
|
7
|
+
|
|
8
|
+
1. Generate {{ num_variations }} question variations that the input answer is attempting to answer.
|
|
9
|
+
3. Compile all the question variations into a JSON object.
|
|
10
|
+
|
|
11
|
+
### JSON Output
|
|
12
|
+
|
|
13
|
+
Format your output as a JSON object with the following attribute:
|
|
14
|
+
|
|
15
|
+
- `question_variations`: a list of strings, where each string represents the generated question that the input answer is addressing.
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
|
|
19
|
+
```json
|
|
20
|
+
{
|
|
21
|
+
"question_variations": [
|
|
22
|
+
"Does Rasa Pro support building an AI assistant with LLMs?",
|
|
23
|
+
"In which version of Rasa Pro was support for LLM usage released?",
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Task Requirements
|
|
29
|
+
|
|
30
|
+
- Ensure that each question variation is relevant to the input answer.
|
|
31
|
+
|
|
32
|
+
### Examples
|
|
33
|
+
|
|
34
|
+
These are a few examples of how to generate question variations and identify non-committed answers:
|
|
35
|
+
|
|
36
|
+
#### Example 1
|
|
37
|
+
- **Input Answer**:
|
|
38
|
+
```
|
|
39
|
+
You can build a CALM assistant with Rasa Pro by defining your own business logic flows.
|
|
40
|
+
I could not identify the exact version of Rasa Pro which released CALM.
|
|
41
|
+
```
|
|
42
|
+
- **Output**:
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"question_variations": [
|
|
46
|
+
"How can you build a CALM assistant with Rasa Pro?",
|
|
47
|
+
"What is the version of Rasa Pro that released CALM?",
|
|
48
|
+
"What type of assistant can you build with Rasa Pro?",
|
|
49
|
+
]
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
#### Example 2:
|
|
54
|
+
- **Input Answer**:
|
|
55
|
+
```
|
|
56
|
+
You can integrate external knowledge sources into your Rasa Pro assistant.
|
|
57
|
+
```
|
|
58
|
+
- **Output**:
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"question_variations": [
|
|
62
|
+
"Can you integrate external knowledge sources into your Rasa Pro assistant?",
|
|
63
|
+
]
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
#### Example 3:
|
|
69
|
+
- **Input Answer**:
|
|
70
|
+
```
|
|
71
|
+
Rasa Pro has released CALM, its LLM-native approach to building reliable conversational AI, in version 3.7.0.
|
|
72
|
+
```
|
|
73
|
+
- **Output**:
|
|
74
|
+
```json
|
|
75
|
+
{
|
|
76
|
+
"question_variations": [
|
|
77
|
+
"What is CALM in Rasa Pro?",
|
|
78
|
+
"In which Rasa Pro version was CALM released?",
|
|
79
|
+
"What is the LLM-native approach in Rasa Pro?",
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
## Task Inputs
|
|
86
|
+
|
|
87
|
+
- **Input Answer**: {{ user_message }}
|
|
88
|
+
|
|
89
|
+
Your output:
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
You are a detail-oriented impartial judge whose task is to evaluate the correctness of a given answer based on the provided ground truth.
|
|
2
|
+
|
|
3
|
+
## Task Steps
|
|
4
|
+
|
|
5
|
+
Follow these steps to complete the task:
|
|
6
|
+
|
|
7
|
+
1. Given the input answer, break down the answer into one or more sentences.
|
|
8
|
+
2. Extract from each sentence one or more fully understandable and distinct statements while also ensuring no pronouns are used in each statement.
|
|
9
|
+
3. For each statement, determine whether the statement is supported by the provided ground truth. If the statement is supported,
|
|
10
|
+
mark it with the score `1`. If the statement is not supported, mark it with the score `0`.
|
|
11
|
+
4. Provide a justification for the score you assigned. Each justification must reference the relevant part of the ground truth.
|
|
12
|
+
5. Compile all the statements, scores, and justifications into a JSON object.
|
|
13
|
+
|
|
14
|
+
### JSON Output
|
|
15
|
+
|
|
16
|
+
Format your output as a JSON object with the following attribute:
|
|
17
|
+
|
|
18
|
+
- `statements`: a list of dictionaries, where each dictionary contains the following keys:
|
|
19
|
+
- `statement`: the extracted statement from the input answer
|
|
20
|
+
- `score`: the score assigned to the statement (0 or 1)
|
|
21
|
+
- `justification`: a brief and distinct explanation justifying the score assigned to the statement
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
|
|
25
|
+
```json
|
|
26
|
+
{
|
|
27
|
+
"statements": [
|
|
28
|
+
{
|
|
29
|
+
"statement": "You can run Rasa Pro using Python 3.11 starting from Rasa Pro version 3.11.",
|
|
30
|
+
"score": 1,
|
|
31
|
+
"justification": "According to the ground truth source, support for Python 3.11 was added in Rasa Pro version 3.11"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"statement": "Rasa Pro does not support LLM usage in any version.",
|
|
35
|
+
"score": 0,
|
|
36
|
+
"justification": "According to the ground truth source, Rasa Pro has released CALM, its LLM-native approach to building reliable conversational AI, in version 3.7.0"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Task Requirements
|
|
43
|
+
|
|
44
|
+
- If the answer contains multiple statements, ensure that each statement is evaluated independently.
|
|
45
|
+
- If the answer contains a statement that is not verifiable by the ground truth, mark the statement as unsupported with the score `0`.
|
|
46
|
+
- If the answer contains a statement that is verifiable by the ground truth, mark the statement as supported with the score `1`.
|
|
47
|
+
- Provide a brief justification for each score assigned to a statement.
|
|
48
|
+
|
|
49
|
+
### Examples
|
|
50
|
+
|
|
51
|
+
These are a few examples of how to evaluate the correctness of the answer based on the ground truth:
|
|
52
|
+
|
|
53
|
+
#### Example 1
|
|
54
|
+
- **Input Answer**:
|
|
55
|
+
```
|
|
56
|
+
You can build a CALM assistant with Rasa Pro by defining your own business logic flows.
|
|
57
|
+
In addition, CALM leverages customizable default flows designed to handle various conversational repair scenarios.
|
|
58
|
+
CALM contains a built-in LLM approach designed to generate predefined commands that
|
|
59
|
+
reflect the user's intentions to start and stop flows, fill slots and more.
|
|
60
|
+
```
|
|
61
|
+
- **Ground Truth**:
|
|
62
|
+
```
|
|
63
|
+
Rasa Pro has released CALM, its LLM-native approach to building reliable conversational AI, in version 3.7.0.
|
|
64
|
+
The CALM approach has three key elements: Business Logic, Dialogue Understanding, and Automatic Conversation Repair.
|
|
65
|
+
Business logic is implemented as a set of flows. A flow describes a business process that your AI assistant can handle.
|
|
66
|
+
Dialogue understanding is designed to interpret what end users are communicating to your assistant.
|
|
67
|
+
This process involves generating commands that reflect the user's intentions, by starting and stopping flows, filling slots and more.
|
|
68
|
+
Automatic conversation repair handles all the ways conversations can go "off script". This is implemented as a set of default flows open for customization.
|
|
69
|
+
```
|
|
70
|
+
- **Output**:
|
|
71
|
+
```json
|
|
72
|
+
{
|
|
73
|
+
"statements": [
|
|
74
|
+
{
|
|
75
|
+
"statement": "You can build a CALM assistant with Rasa Pro.",
|
|
76
|
+
"score": 1,
|
|
77
|
+
"justification": "The ground truth confirms that Rasa Pro has released CALM, its LLM-native approach to building reliable conversational AI"
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"statement": "You can define your own business logic as flows in a CALM assistant with Rasa Pro.",
|
|
81
|
+
"score": 1,
|
|
82
|
+
"justification": "The ground truth confirms that business logic is implemented as a set of flows in CALM"
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"statement": "Conversation repair scenarios are handled by customizable default flows in CALM.",
|
|
86
|
+
"score": 1,
|
|
87
|
+
"justification": "The ground truth confirms that automatic conversation repair is implemented as a set of default flows open for customization"
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"statement": "CALM contains a built-in LLM approach.",
|
|
91
|
+
"score": 1,
|
|
92
|
+
"justification": "The ground truth confirms that CALM leverages an LLM-native approach"
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"statement": "The LLM approach in CALM generates predefined commands reflecting user intentions.",
|
|
96
|
+
"score": 1,
|
|
97
|
+
"justification": "The ground truth confirms that dialogue understanding involves generating commands reflecting user intentions"
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"statement": "The LLM approach in CALM is designed to start and stop flows, fill slots, and more.",
|
|
101
|
+
"score": 1,
|
|
102
|
+
"justification": "The ground truth confirms that dialogue understanding involves generating commands to start and stop flows, fill slots, and more"
|
|
103
|
+
}
|
|
104
|
+
]
|
|
105
|
+
}
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
#### Example 2:
|
|
109
|
+
- **Input Answer**:
|
|
110
|
+
```
|
|
111
|
+
You cannot integrate external knowledge into your Rasa Pro assistant.
|
|
112
|
+
```
|
|
113
|
+
- **Ground Truth**:
|
|
114
|
+
```
|
|
115
|
+
The Enterprise Search Policy is part of Rasa's new Conversational AI with Language Models (CALM) approach and available starting with version 3.7.0.
|
|
116
|
+
The Enterprise Search Policy uses an LLM to search external knowledge base documents in order to deliver a relevant, context-aware response from the data.
|
|
117
|
+
```
|
|
118
|
+
- **Output**:
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"statements": [
|
|
122
|
+
{
|
|
123
|
+
"statement": "Rasa Pro does not support integrating external knowledge.",
|
|
124
|
+
"score": 0,
|
|
125
|
+
"justification": "The provided statement is incorrect, because the ground truth confirms that the Enterprise Search Policy in Rasa Pro's CALM approach uses an LLM to search external knowledge base documents"
|
|
126
|
+
}
|
|
127
|
+
]
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
#### Example 3:
|
|
133
|
+
- **Input Answer**:
|
|
134
|
+
```
|
|
135
|
+
Rasa Pro has released CALM, its LLM-native approach to building reliable conversational AI, in version 3.6.0.
|
|
136
|
+
```
|
|
137
|
+
- **Ground Truth**:
|
|
138
|
+
```
|
|
139
|
+
Rasa Pro has released CALM, its LLM-native approach to building reliable conversational AI, in version 3.7.0.
|
|
140
|
+
```
|
|
141
|
+
- **Output**:
|
|
142
|
+
```json
|
|
143
|
+
{
|
|
144
|
+
"statements": [
|
|
145
|
+
{
|
|
146
|
+
"statement": "CALM is Rasa Pro's LLM-native approach to building reliable conversational AI.",
|
|
147
|
+
"score": 1,
|
|
148
|
+
"justification": "The ground truth confirms that Rasa Pro has released CALM, its LLM-native approach to building reliable conversational AI"
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
"statement": "CALM was released in Rasa Pro version 3.6.0.",
|
|
152
|
+
"score": 0,
|
|
153
|
+
"justification": "The provided statement is incorrect, as the ground truth confirms that CALM was released in Rasa Pro version 3.7.0"
|
|
154
|
+
}
|
|
155
|
+
]
|
|
156
|
+
}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
## Task Inputs
|
|
161
|
+
|
|
162
|
+
- **Input Answer**: {{ bot_message }}
|
|
163
|
+
- **Ground Truth**: {{ ground_truth }}
|
|
164
|
+
|
|
165
|
+
Your output:
|