rasa-pro 3.11.0a4.dev3__py3-none-any.whl → 3.11.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +22 -12
- rasa/api.py +1 -1
- rasa/cli/arguments/default_arguments.py +1 -2
- rasa/cli/arguments/shell.py +5 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +8 -8
- rasa/cli/inspect.py +6 -4
- rasa/cli/llm_fine_tuning.py +1 -1
- rasa/cli/project_templates/calm/config.yml +5 -7
- rasa/cli/project_templates/calm/endpoints.yml +8 -0
- rasa/cli/project_templates/tutorial/config.yml +8 -5
- rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
- rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
- rasa/cli/project_templates/tutorial/domain.yml +14 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +7 -7
- rasa/cli/run.py +1 -1
- rasa/cli/scaffold.py +4 -2
- rasa/cli/studio/studio.py +18 -8
- rasa/cli/utils.py +5 -0
- rasa/cli/x.py +8 -8
- rasa/constants.py +1 -1
- rasa/core/actions/action_repeat_bot_messages.py +17 -0
- rasa/core/channels/channel.py +20 -0
- rasa/core/channels/inspector/dist/assets/{arc-6852c607.js → arc-bc141fb2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-acc952b2.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-848a7597.js → classDiagram-936ed81e-55366915.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-a73d3e68.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-e5ee049d.js → createText-62fc7601-b0ec81d6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-771e517e.js → edges-f2ad444c-6166330c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-aa347178.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-651fc57d.js → flowDb-1972c806-fca3bfe4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-ca67804f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-2dbc568d.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-25a65bd8.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-fdc7378d.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-6f1fd606.js → index-2c4b9a3b-f55afcdf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-efdd30c1.js → index-e7cef9de.js} +68 -68
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-cb1a041a.js → infoDiagram-736b4530-124d4a14.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-14609879.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-2490f52b.js → layout-b9885fb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-40186f1f.js → line-7c59abb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-08814e93.js → linear-4776f780.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-1a534584.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-72397b61.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-3bb0b6a3.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-57334f61.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-111e1297.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-10bcfe62.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-acaf7513.js → stateDiagram-59f0c015-042b3137.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-3ec2a235.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-080da4f6-62730289.js → styles-080da4f6-23ffa4fc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-5284ee76.js → styles-3dcbcfbf-94f59763.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-642435e3.js → styles-9c745c82-78a6bebc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-b250a350.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-c2b147ed.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-f92cfea9.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/App.tsx +1 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +77 -16
- rasa/core/channels/socketio.py +2 -1
- rasa/core/channels/telegram.py +1 -1
- rasa/core/channels/twilio.py +1 -1
- rasa/core/channels/voice_ready/audiocodes.py +12 -0
- rasa/core/channels/voice_ready/jambonz.py +15 -4
- rasa/core/channels/voice_ready/twilio_voice.py +6 -21
- rasa/core/channels/voice_stream/asr/asr_event.py +5 -0
- rasa/core/channels/voice_stream/asr/azure.py +122 -0
- rasa/core/channels/voice_stream/asr/deepgram.py +16 -6
- rasa/core/channels/voice_stream/audio_bytes.py +1 -0
- rasa/core/channels/voice_stream/browser_audio.py +31 -8
- rasa/core/channels/voice_stream/call_state.py +23 -0
- rasa/core/channels/voice_stream/tts/azure.py +6 -2
- rasa/core/channels/voice_stream/tts/cartesia.py +10 -6
- rasa/core/channels/voice_stream/tts/tts_engine.py +1 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +27 -18
- rasa/core/channels/voice_stream/util.py +4 -4
- rasa/core/channels/voice_stream/voice_channel.py +189 -39
- rasa/core/featurizers/single_state_featurizer.py +22 -1
- rasa/core/featurizers/tracker_featurizers.py +115 -18
- rasa/core/nlg/contextual_response_rephraser.py +32 -30
- rasa/core/persistor.py +86 -39
- rasa/core/policies/enterprise_search_policy.py +119 -60
- rasa/core/policies/flows/flow_executor.py +7 -4
- rasa/core/policies/intentless_policy.py +78 -22
- rasa/core/policies/ted_policy.py +58 -33
- rasa/core/policies/unexpected_intent_policy.py +15 -7
- rasa/core/processor.py +25 -0
- rasa/core/training/interactive.py +34 -35
- rasa/core/utils.py +8 -3
- rasa/dialogue_understanding/coexistence/llm_based_router.py +39 -12
- rasa/dialogue_understanding/commands/change_flow_command.py +6 -0
- rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +5 -0
- rasa/dialogue_understanding/generator/constants.py +2 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +49 -4
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +37 -23
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +57 -10
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +19 -1
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +71 -11
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +39 -0
- rasa/dialogue_understanding/patterns/user_silence.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +21 -1
- rasa/e2e_test/e2e_test_case.py +85 -6
- rasa/e2e_test/e2e_test_runner.py +4 -2
- rasa/e2e_test/utils/io.py +1 -1
- rasa/engine/validation.py +316 -10
- rasa/model_manager/config.py +15 -3
- rasa/model_manager/model_api.py +15 -7
- rasa/model_manager/runner_service.py +8 -6
- rasa/model_manager/socket_bridge.py +6 -3
- rasa/model_manager/trainer_service.py +7 -5
- rasa/model_manager/utils.py +28 -7
- rasa/model_service.py +9 -2
- rasa/model_training.py +2 -0
- rasa/nlu/classifiers/diet_classifier.py +38 -25
- rasa/nlu/classifiers/logistic_regression_classifier.py +22 -9
- rasa/nlu/classifiers/sklearn_intent_classifier.py +37 -16
- rasa/nlu/extractors/crf_entity_extractor.py +93 -50
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +45 -16
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +52 -17
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +5 -3
- rasa/nlu/tokenizers/whitespace_tokenizer.py +3 -14
- rasa/server.py +3 -1
- rasa/shared/constants.py +36 -3
- rasa/shared/core/constants.py +7 -0
- rasa/shared/core/domain.py +26 -0
- rasa/shared/core/flows/flow.py +5 -0
- rasa/shared/core/flows/flows_list.py +5 -1
- rasa/shared/core/flows/flows_yaml_schema.json +10 -0
- rasa/shared/core/flows/utils.py +39 -0
- rasa/shared/core/flows/validation.py +96 -0
- rasa/shared/core/slots.py +5 -0
- rasa/shared/nlu/training_data/features.py +120 -2
- rasa/shared/providers/_configs/azure_openai_client_config.py +5 -3
- rasa/shared/providers/_configs/litellm_router_client_config.py +200 -0
- rasa/shared/providers/_configs/model_group_config.py +167 -0
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
- rasa/shared/providers/_configs/utils.py +16 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +18 -29
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
- rasa/shared/providers/llm/_base_litellm_client.py +37 -31
- rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
- rasa/shared/providers/llm/litellm_router_llm_client.py +127 -0
- rasa/shared/providers/llm/rasa_llm_client.py +112 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +1 -1
- rasa/shared/providers/mappings.py +19 -0
- rasa/shared/providers/router/__init__.py +0 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +149 -0
- rasa/shared/providers/router/router_client.py +73 -0
- rasa/shared/utils/common.py +8 -0
- rasa/shared/utils/health_check/__init__.py +0 -0
- rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
- rasa/shared/utils/health_check/health_check.py +256 -0
- rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
- rasa/shared/utils/io.py +28 -6
- rasa/shared/utils/llm.py +353 -46
- rasa/shared/utils/yaml.py +111 -73
- rasa/studio/auth.py +3 -5
- rasa/studio/config.py +13 -4
- rasa/studio/constants.py +1 -0
- rasa/studio/data_handler.py +10 -3
- rasa/studio/upload.py +81 -26
- rasa/telemetry.py +92 -17
- rasa/tracing/config.py +2 -0
- rasa/tracing/instrumentation/attribute_extractors.py +94 -17
- rasa/tracing/instrumentation/instrumentation.py +121 -0
- rasa/utils/common.py +5 -0
- rasa/utils/io.py +7 -81
- rasa/utils/log_utils.py +9 -2
- rasa/utils/sanic_error_handler.py +32 -0
- rasa/utils/tensorflow/feature_array.py +366 -0
- rasa/utils/tensorflow/model_data.py +2 -193
- rasa/validator.py +70 -0
- rasa/version.py +1 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/METADATA +11 -10
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/RECORD +183 -163
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-587d82d8.js +0 -1
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/NOTICE +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/WHEEL +0 -0
- {rasa_pro-3.11.0a4.dev3.dist-info → rasa_pro-3.11.0rc2.dist-info}/entry_points.txt +0 -0
rasa/utils/io.py
CHANGED
|
@@ -2,7 +2,6 @@ import asyncio
|
|
|
2
2
|
import filecmp
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
import pickle
|
|
6
5
|
import tempfile
|
|
7
6
|
import warnings
|
|
8
7
|
import re
|
|
@@ -19,7 +18,6 @@ from typing import (
|
|
|
19
18
|
Type,
|
|
20
19
|
Callable,
|
|
21
20
|
TYPE_CHECKING,
|
|
22
|
-
Pattern,
|
|
23
21
|
)
|
|
24
22
|
|
|
25
23
|
from ruamel import yaml
|
|
@@ -98,29 +96,6 @@ def enable_async_loop_debugging(
|
|
|
98
96
|
return event_loop
|
|
99
97
|
|
|
100
98
|
|
|
101
|
-
def pickle_dump(filename: Union[Text, Path], obj: Any) -> None:
|
|
102
|
-
"""Saves object to file.
|
|
103
|
-
|
|
104
|
-
Args:
|
|
105
|
-
filename: the filename to save the object to
|
|
106
|
-
obj: the object to store
|
|
107
|
-
"""
|
|
108
|
-
with open(filename, "wb") as f:
|
|
109
|
-
pickle.dump(obj, f)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def pickle_load(filename: Union[Text, Path]) -> Any:
|
|
113
|
-
"""Loads an object from a file.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
filename: the filename to load the object from
|
|
117
|
-
|
|
118
|
-
Returns: the loaded object
|
|
119
|
-
"""
|
|
120
|
-
with open(filename, "rb") as f:
|
|
121
|
-
return pickle.load(f)
|
|
122
|
-
|
|
123
|
-
|
|
124
99
|
def create_temporary_file(data: Any, suffix: Text = "", mode: Text = "w+") -> Text:
|
|
125
100
|
"""Creates a tempfile.NamedTemporaryFile object for data."""
|
|
126
101
|
encoding = None if "b" in mode else rasa.shared.utils.io.DEFAULT_ENCODING
|
|
@@ -191,63 +166,14 @@ def create_validator(
|
|
|
191
166
|
return FunctionValidator
|
|
192
167
|
|
|
193
168
|
|
|
194
|
-
def
|
|
195
|
-
|
|
196
|
-
) -> Any:
|
|
197
|
-
"""Unpickle an object from file using json.
|
|
198
|
-
|
|
199
|
-
Args:
|
|
200
|
-
file_name: the file to load the object from
|
|
201
|
-
encode_non_string_keys: If set to `True` then jsonpickle will encode non-string
|
|
202
|
-
dictionary keys instead of coercing them into strings via `repr()`.
|
|
203
|
-
|
|
204
|
-
Returns: the object
|
|
205
|
-
"""
|
|
206
|
-
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
207
|
-
import jsonpickle
|
|
208
|
-
|
|
209
|
-
jsonpickle_numpy.register_handlers()
|
|
210
|
-
|
|
211
|
-
file_content = rasa.shared.utils.io.read_file(file_name)
|
|
212
|
-
return jsonpickle.loads(file_content, keys=encode_non_string_keys)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
def json_pickle(
|
|
216
|
-
file_name: Union[Text, Path], obj: Any, encode_non_string_keys: bool = False
|
|
217
|
-
) -> None:
|
|
218
|
-
"""Pickle an object to a file using json.
|
|
219
|
-
|
|
220
|
-
Args:
|
|
221
|
-
file_name: the file to store the object to
|
|
222
|
-
obj: the object to store
|
|
223
|
-
encode_non_string_keys: If set to `True` then jsonpickle will encode non-string
|
|
224
|
-
dictionary keys instead of coercing them into strings via `repr()`.
|
|
225
|
-
"""
|
|
226
|
-
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
227
|
-
import jsonpickle
|
|
228
|
-
|
|
229
|
-
jsonpickle_numpy.register_handlers()
|
|
169
|
+
def remove_emojis(s: str) -> str:
|
|
170
|
+
import demoji
|
|
230
171
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
def get_emoji_regex() -> Pattern:
|
|
237
|
-
"""Returns regex to identify emojis."""
|
|
238
|
-
return re.compile(
|
|
239
|
-
"["
|
|
240
|
-
"\U0001f600-\U0001f64f" # emoticons
|
|
241
|
-
"\U0001f300-\U0001f5ff" # symbols & pictographs
|
|
242
|
-
"\U0001f680-\U0001f6ff" # transport & map symbols
|
|
243
|
-
"\U0001f1e0-\U0001f1ff" # flags (iOS)
|
|
244
|
-
"\U00002702-\U000027b0"
|
|
245
|
-
"\U000024c2-\U0001f251"
|
|
246
|
-
"\u200d" # zero width joiner
|
|
247
|
-
"\u200c" # zero width non-joiner
|
|
248
|
-
"]+",
|
|
249
|
-
flags=re.UNICODE,
|
|
250
|
-
)
|
|
172
|
+
replaced = demoji.replace(s)
|
|
173
|
+
if replaced == s:
|
|
174
|
+
return s
|
|
175
|
+
# remove duplicate or trailing whitespaces if emojis were removed
|
|
176
|
+
return re.sub(r" +", " ", replaced).strip()
|
|
251
177
|
|
|
252
178
|
|
|
253
179
|
def are_directories_equal(dir1: Path, dir2: Path) -> bool:
|
rasa/utils/log_utils.py
CHANGED
|
@@ -78,6 +78,7 @@ def _anonymizer(
|
|
|
78
78
|
|
|
79
79
|
def configure_structlog(
|
|
80
80
|
log_level: Optional[int] = None,
|
|
81
|
+
include_time: bool = False,
|
|
81
82
|
) -> None:
|
|
82
83
|
"""Configure logging of the server."""
|
|
83
84
|
if log_level is None: # Log level NOTSET is 0 so we use `is None` here
|
|
@@ -114,6 +115,9 @@ def configure_structlog(
|
|
|
114
115
|
SentryProcessor(event_level=logging.FATAL),
|
|
115
116
|
]
|
|
116
117
|
|
|
118
|
+
if include_time:
|
|
119
|
+
shared_processors.append(structlog.processors.TimeStamper(fmt="iso"))
|
|
120
|
+
|
|
117
121
|
if not FORCE_JSON_LOGGING and sys.stderr.isatty():
|
|
118
122
|
# Pretty printing when we run in a terminal session.
|
|
119
123
|
# Automatically prints pretty tracebacks when "rich" is installed
|
|
@@ -143,11 +147,14 @@ def configure_structlog(
|
|
|
143
147
|
# logger.
|
|
144
148
|
cache_logger_on_first_use=True,
|
|
145
149
|
)
|
|
150
|
+
# doing logger creation inline, to prevent usage of unconfigured logger
|
|
151
|
+
structlog.get_logger().debug("structlog.configured")
|
|
146
152
|
|
|
147
153
|
|
|
148
154
|
def log_llm(logger: Any, log_module: str, log_event: str, **kwargs: Any) -> None:
|
|
149
|
-
"""Logs LLM-specific events depending on a flag passed through an
|
|
150
|
-
|
|
155
|
+
"""Logs LLM-specific events depending on a flag passed through an env var.
|
|
156
|
+
|
|
157
|
+
If the module's flag is set to INFO (e.g.
|
|
151
158
|
LOG_PROMPT_LLM_COMMAND_GENERATOR=INFO), its prompt is logged at INFO level,
|
|
152
159
|
overriding the general log level setting.
|
|
153
160
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from sanic import Sanic
|
|
2
|
+
from sanic.handlers import ErrorHandler
|
|
3
|
+
from sanic.request import Request
|
|
4
|
+
from sanic.exceptions import ServerError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# TODO: remove custom handler when upgrading to sanic >= 24
|
|
8
|
+
# the underlying issue https://github.com/sanic-org/sanic/issues/2572
|
|
9
|
+
# has been fixed in sanic 24
|
|
10
|
+
class IgnoreWSServerErrorHandler(ErrorHandler):
|
|
11
|
+
@staticmethod
|
|
12
|
+
def log(request: Request, exception: Exception) -> None:
|
|
13
|
+
try:
|
|
14
|
+
if (
|
|
15
|
+
request.url.startswith("ws")
|
|
16
|
+
and isinstance(exception, ServerError)
|
|
17
|
+
and exception.args
|
|
18
|
+
and (
|
|
19
|
+
exception.args[0]
|
|
20
|
+
== "Invalid response type None (need HTTPResponse)"
|
|
21
|
+
)
|
|
22
|
+
):
|
|
23
|
+
# in case we are in a websocket connection, we don't want to log the
|
|
24
|
+
# the error, as this is a bug in sanic
|
|
25
|
+
return
|
|
26
|
+
except Exception:
|
|
27
|
+
pass
|
|
28
|
+
ErrorHandler.log(request, exception) # type: ignore
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def register_custom_sanic_error_handler(app: Sanic) -> None:
|
|
32
|
+
app.error_handler = IgnoreWSServerErrorHandler()
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
from typing import Dict, Any, List, Tuple, Optional, Union
|
|
2
|
+
|
|
3
|
+
from safetensors.numpy import save_file
|
|
4
|
+
import numpy as np
|
|
5
|
+
from safetensors.numpy import load_file
|
|
6
|
+
import scipy.sparse
|
|
7
|
+
|
|
8
|
+
import rasa.shared.utils.io
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _recursive_serialize(
|
|
12
|
+
array: Any, prefix: str, data_dict: Dict[str, Any], metadata: List[Dict[str, Any]]
|
|
13
|
+
) -> None:
|
|
14
|
+
"""Recursively serialize arrays and matrices for high dimensional data."""
|
|
15
|
+
if isinstance(array, np.ndarray) and array.ndim <= 2:
|
|
16
|
+
data_key = f"{prefix}_array"
|
|
17
|
+
data_dict[data_key] = array
|
|
18
|
+
metadata.append({"type": "dense", "key": data_key, "shape": array.shape})
|
|
19
|
+
|
|
20
|
+
elif isinstance(array, list) and all([isinstance(v, float) for v in array]):
|
|
21
|
+
data_key = f"{prefix}_list"
|
|
22
|
+
data_dict[data_key] = np.array(array, dtype=np.float32)
|
|
23
|
+
metadata.append({"type": "list", "key": data_key})
|
|
24
|
+
|
|
25
|
+
elif isinstance(array, list) and all([isinstance(v, int) for v in array]):
|
|
26
|
+
data_key = f"{prefix}_list"
|
|
27
|
+
data_dict[data_key] = np.array(array, dtype=np.int64)
|
|
28
|
+
metadata.append({"type": "list", "key": data_key})
|
|
29
|
+
|
|
30
|
+
elif isinstance(array, scipy.sparse.spmatrix):
|
|
31
|
+
data_key_data = f"{prefix}_data"
|
|
32
|
+
data_key_row = f"{prefix}_row"
|
|
33
|
+
data_key_col = f"{prefix}_col"
|
|
34
|
+
array = array.tocoo()
|
|
35
|
+
data_dict.update(
|
|
36
|
+
{
|
|
37
|
+
data_key_data: array.data,
|
|
38
|
+
data_key_row: array.row,
|
|
39
|
+
data_key_col: array.col,
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
metadata.append({"type": "sparse", "key": prefix, "shape": array.shape})
|
|
43
|
+
|
|
44
|
+
elif isinstance(array, list) or isinstance(array, np.ndarray):
|
|
45
|
+
group_metadata = {"type": "group", "subcomponents": []}
|
|
46
|
+
for idx, item in enumerate(array):
|
|
47
|
+
new_prefix = f"{prefix}_{idx}"
|
|
48
|
+
_recursive_serialize(
|
|
49
|
+
item, new_prefix, data_dict, group_metadata["subcomponents"]
|
|
50
|
+
)
|
|
51
|
+
metadata.append(group_metadata)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _serialize_nested_data(
|
|
55
|
+
nested_data: Dict[str, Dict[str, List["FeatureArray"]]],
|
|
56
|
+
prefix: str,
|
|
57
|
+
data_dict: Dict[str, np.ndarray],
|
|
58
|
+
metadata: List[Dict[str, Union[str, List]]],
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Handle serialization across dictionary and list levels."""
|
|
61
|
+
for outer_key, inner_dict in nested_data.items():
|
|
62
|
+
inner_metadata = {"key": outer_key, "components": []}
|
|
63
|
+
|
|
64
|
+
for inner_key, feature_arrays in inner_dict.items():
|
|
65
|
+
array_metadata = {
|
|
66
|
+
"key": inner_key,
|
|
67
|
+
"number_of_dimensions": feature_arrays[0].number_of_dimensions,
|
|
68
|
+
"features": [],
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
for idx, feature_array in enumerate(feature_arrays):
|
|
72
|
+
feature_prefix = f"{prefix}_{outer_key}_{inner_key}_{idx}"
|
|
73
|
+
_recursive_serialize(
|
|
74
|
+
feature_array.tolist(),
|
|
75
|
+
feature_prefix,
|
|
76
|
+
data_dict,
|
|
77
|
+
array_metadata["features"],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
inner_metadata["components"].append(array_metadata) # type:ignore[attr-defined]
|
|
81
|
+
|
|
82
|
+
metadata.append(inner_metadata)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def serialize_nested_feature_arrays(
|
|
86
|
+
nested_feature_array: Dict[str, Dict[str, List["FeatureArray"]]],
|
|
87
|
+
data_filename: str,
|
|
88
|
+
metadata_filename: str,
|
|
89
|
+
) -> None:
|
|
90
|
+
data_dict: Dict[str, np.ndarray] = {}
|
|
91
|
+
metadata: List[Dict[str, Union[str, List]]] = []
|
|
92
|
+
|
|
93
|
+
_serialize_nested_data(nested_feature_array, "component", data_dict, metadata)
|
|
94
|
+
|
|
95
|
+
# Save serialized data and metadata
|
|
96
|
+
save_file(data_dict, data_filename)
|
|
97
|
+
rasa.shared.utils.io.dump_obj_as_json_to_file(metadata_filename, metadata)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _recursive_deserialize(
|
|
101
|
+
metadata: List[Dict[str, Any]], data: Dict[str, Any]
|
|
102
|
+
) -> List[Any]:
|
|
103
|
+
"""Recursively deserialize arrays and matrices for high dimensional data."""
|
|
104
|
+
result = []
|
|
105
|
+
|
|
106
|
+
for item in metadata:
|
|
107
|
+
if item["type"] == "dense":
|
|
108
|
+
key = item["key"]
|
|
109
|
+
array = np.asarray(data[key]).reshape(item["shape"])
|
|
110
|
+
result.append(array)
|
|
111
|
+
|
|
112
|
+
elif item["type"] == "list":
|
|
113
|
+
key = item["key"]
|
|
114
|
+
result.append(list(data[key]))
|
|
115
|
+
|
|
116
|
+
elif item["type"] == "sparse":
|
|
117
|
+
data_vals = data[f"{item['key']}_data"]
|
|
118
|
+
row_vals = data[f"{item['key']}_row"]
|
|
119
|
+
col_vals = data[f"{item['key']}_col"]
|
|
120
|
+
sparse_matrix = scipy.sparse.coo_matrix(
|
|
121
|
+
(data_vals, (row_vals, col_vals)), shape=item["shape"]
|
|
122
|
+
)
|
|
123
|
+
result.append(sparse_matrix)
|
|
124
|
+
elif item["type"] == "group":
|
|
125
|
+
sublist = _recursive_deserialize(item["subcomponents"], data)
|
|
126
|
+
result.append(sublist)
|
|
127
|
+
|
|
128
|
+
return result
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _deserialize_nested_data(
|
|
132
|
+
metadata: List[Dict[str, Any]], data_dict: Dict[str, Any]
|
|
133
|
+
) -> Dict[str, Dict[str, List["FeatureArray"]]]:
|
|
134
|
+
"""Handle deserialization across all dictionary and list levels."""
|
|
135
|
+
result: Dict[str, Dict[str, List["FeatureArray"]]] = {}
|
|
136
|
+
|
|
137
|
+
for outer_item in metadata:
|
|
138
|
+
outer_key = outer_item["key"]
|
|
139
|
+
result[outer_key] = {}
|
|
140
|
+
|
|
141
|
+
for inner_item in outer_item["components"]:
|
|
142
|
+
inner_key = inner_item["key"]
|
|
143
|
+
feature_arrays = []
|
|
144
|
+
|
|
145
|
+
# Reconstruct the list of FeatureArrays
|
|
146
|
+
for feature_item in inner_item["features"]:
|
|
147
|
+
# Reconstruct the list of FeatureArrays
|
|
148
|
+
feature_array_data = _recursive_deserialize([feature_item], data_dict)
|
|
149
|
+
# Prepare the input for the FeatureArray;
|
|
150
|
+
# ensure it is np.ndarray compatible
|
|
151
|
+
input_array = np.array(feature_array_data[0], dtype=object)
|
|
152
|
+
feature_array = FeatureArray(
|
|
153
|
+
input_array, inner_item["number_of_dimensions"]
|
|
154
|
+
)
|
|
155
|
+
feature_arrays.append(feature_array)
|
|
156
|
+
|
|
157
|
+
result[outer_key][inner_key] = feature_arrays
|
|
158
|
+
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def deserialize_nested_feature_arrays(
|
|
163
|
+
data_filename: str, metadata_filename: str
|
|
164
|
+
) -> Dict[str, Dict[str, List["FeatureArray"]]]:
|
|
165
|
+
metadata = rasa.shared.utils.io.read_json_file(metadata_filename)
|
|
166
|
+
data_dict = load_file(data_filename)
|
|
167
|
+
|
|
168
|
+
return _deserialize_nested_data(metadata, data_dict)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class FeatureArray(np.ndarray):
|
|
172
|
+
"""Stores any kind of features ready to be used by a RasaModel.
|
|
173
|
+
|
|
174
|
+
Next to the input numpy array of features, it also received the number of
|
|
175
|
+
dimensions of the features.
|
|
176
|
+
As our features can have 1 to 4 dimensions we might have different number of numpy
|
|
177
|
+
arrays stacked. The number of dimensions helps us to figure out how to handle this
|
|
178
|
+
particular feature array. Also, it is automatically determined whether the feature
|
|
179
|
+
array is sparse or not and the number of units is determined as well.
|
|
180
|
+
|
|
181
|
+
Subclassing np.array: https://numpy.org/doc/stable/user/basics.subclassing.html
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
def __new__(
|
|
185
|
+
cls, input_array: np.ndarray, number_of_dimensions: int
|
|
186
|
+
) -> "FeatureArray":
|
|
187
|
+
"""Create and return a new object. See help(type) for accurate signature."""
|
|
188
|
+
FeatureArray._validate_number_of_dimensions(number_of_dimensions, input_array)
|
|
189
|
+
|
|
190
|
+
feature_array = np.asarray(input_array).view(cls)
|
|
191
|
+
|
|
192
|
+
if number_of_dimensions <= 2:
|
|
193
|
+
feature_array.units = input_array.shape[-1]
|
|
194
|
+
feature_array.is_sparse = isinstance(input_array[0], scipy.sparse.spmatrix)
|
|
195
|
+
elif number_of_dimensions == 3:
|
|
196
|
+
feature_array.units = input_array[0].shape[-1]
|
|
197
|
+
feature_array.is_sparse = isinstance(input_array[0], scipy.sparse.spmatrix)
|
|
198
|
+
elif number_of_dimensions == 4:
|
|
199
|
+
feature_array.units = input_array[0][0].shape[-1]
|
|
200
|
+
feature_array.is_sparse = isinstance(
|
|
201
|
+
input_array[0][0], scipy.sparse.spmatrix
|
|
202
|
+
)
|
|
203
|
+
else:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"Number of dimensions '{number_of_dimensions}' currently not "
|
|
206
|
+
f"supported."
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
feature_array.number_of_dimensions = number_of_dimensions
|
|
210
|
+
|
|
211
|
+
return feature_array
|
|
212
|
+
|
|
213
|
+
def __init__(
|
|
214
|
+
self, input_array: Any, number_of_dimensions: int, **kwargs: Any
|
|
215
|
+
) -> None:
|
|
216
|
+
"""Initialize. FeatureArray.
|
|
217
|
+
|
|
218
|
+
Needed in order to avoid 'Invalid keyword argument number_of_dimensions
|
|
219
|
+
to function FeatureArray.__init__ '
|
|
220
|
+
Args:
|
|
221
|
+
input_array: the array that contains features
|
|
222
|
+
number_of_dimensions: number of dimensions in input_array
|
|
223
|
+
"""
|
|
224
|
+
super().__init__(**kwargs)
|
|
225
|
+
self.number_of_dimensions = number_of_dimensions
|
|
226
|
+
|
|
227
|
+
def __array_finalize__(self, obj: Optional[np.ndarray]) -> None:
|
|
228
|
+
"""This method is called when the system allocates a new array from obj.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
obj: A subclass (subtype) of ndarray.
|
|
232
|
+
"""
|
|
233
|
+
if obj is None:
|
|
234
|
+
return
|
|
235
|
+
|
|
236
|
+
self.units = getattr(obj, "units", None)
|
|
237
|
+
self.number_of_dimensions = getattr(obj, "number_of_dimensions", None) # type: ignore[assignment]
|
|
238
|
+
self.is_sparse = getattr(obj, "is_sparse", None)
|
|
239
|
+
|
|
240
|
+
default_attributes = {
|
|
241
|
+
"units": self.units,
|
|
242
|
+
"number_of_dimensions": self.number_of_dimensions,
|
|
243
|
+
"is_spare": self.is_sparse,
|
|
244
|
+
}
|
|
245
|
+
self.__dict__.update(default_attributes)
|
|
246
|
+
|
|
247
|
+
# pytype: disable=attribute-error
|
|
248
|
+
def __array_ufunc__(
|
|
249
|
+
self, ufunc: Any, method: str, *inputs: Any, **kwargs: Any
|
|
250
|
+
) -> Any:
|
|
251
|
+
"""Overwrite this method as we are subclassing numpy array.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
ufunc: The ufunc object that was called.
|
|
255
|
+
method: A string indicating which Ufunc method was called
|
|
256
|
+
(one of "__call__", "reduce", "reduceat", "accumulate", "outer",
|
|
257
|
+
"inner").
|
|
258
|
+
*inputs: A tuple of the input arguments to the ufunc.
|
|
259
|
+
**kwargs: Any additional arguments
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
The result of the operation.
|
|
263
|
+
"""
|
|
264
|
+
f = {
|
|
265
|
+
"reduce": ufunc.reduce,
|
|
266
|
+
"accumulate": ufunc.accumulate,
|
|
267
|
+
"reduceat": ufunc.reduceat,
|
|
268
|
+
"outer": ufunc.outer,
|
|
269
|
+
"at": ufunc.at,
|
|
270
|
+
"__call__": ufunc,
|
|
271
|
+
}
|
|
272
|
+
# convert the inputs to np.ndarray to prevent recursion, call the function,
|
|
273
|
+
# then cast it back as FeatureArray
|
|
274
|
+
output = FeatureArray(
|
|
275
|
+
f[method](*(i.view(np.ndarray) for i in inputs), **kwargs),
|
|
276
|
+
number_of_dimensions=kwargs["number_of_dimensions"],
|
|
277
|
+
)
|
|
278
|
+
output.__dict__ = self.__dict__ # carry forward attributes
|
|
279
|
+
return output
|
|
280
|
+
|
|
281
|
+
def __reduce__(self) -> Tuple[Any, Any, Any]:
|
|
282
|
+
"""Needed in order to pickle this object.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
A tuple.
|
|
286
|
+
"""
|
|
287
|
+
pickled_state = super(FeatureArray, self).__reduce__()
|
|
288
|
+
if isinstance(pickled_state, str):
|
|
289
|
+
raise TypeError("np array __reduce__ returned string instead of tuple.")
|
|
290
|
+
new_state = pickled_state[2] + (
|
|
291
|
+
self.number_of_dimensions,
|
|
292
|
+
self.is_sparse,
|
|
293
|
+
self.units,
|
|
294
|
+
)
|
|
295
|
+
return pickled_state[0], pickled_state[1], new_state
|
|
296
|
+
|
|
297
|
+
def __setstate__(self, state: Any, **kwargs: Any) -> None:
|
|
298
|
+
"""Sets the state.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
state: The state argument must be a sequence that contains the following
|
|
302
|
+
elements version, shape, dtype, isFortan, rawdata.
|
|
303
|
+
**kwargs: Any additional parameter
|
|
304
|
+
"""
|
|
305
|
+
# Needed in order to load the object
|
|
306
|
+
self.number_of_dimensions = state[-3]
|
|
307
|
+
self.is_sparse = state[-2]
|
|
308
|
+
self.units = state[-1]
|
|
309
|
+
super(FeatureArray, self).__setstate__(state[0:-3], **kwargs)
|
|
310
|
+
|
|
311
|
+
# pytype: enable=attribute-error
|
|
312
|
+
|
|
313
|
+
@staticmethod
|
|
314
|
+
def _validate_number_of_dimensions(
|
|
315
|
+
number_of_dimensions: int, input_array: np.ndarray
|
|
316
|
+
) -> None:
|
|
317
|
+
"""Validates if the input array has given number of dimensions.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
number_of_dimensions: number of dimensions
|
|
321
|
+
input_array: input array
|
|
322
|
+
|
|
323
|
+
Raises: ValueError in case the dimensions do not match
|
|
324
|
+
"""
|
|
325
|
+
# when loading the feature arrays from disk, the shape represents
|
|
326
|
+
# the correct number of dimensions
|
|
327
|
+
if len(input_array.shape) == number_of_dimensions:
|
|
328
|
+
return
|
|
329
|
+
|
|
330
|
+
_sub_array = input_array
|
|
331
|
+
dim = 0
|
|
332
|
+
# Go number_of_dimensions into the given input_array
|
|
333
|
+
for i in range(1, number_of_dimensions + 1):
|
|
334
|
+
_sub_array = _sub_array[0]
|
|
335
|
+
if isinstance(_sub_array, scipy.sparse.spmatrix):
|
|
336
|
+
dim = i
|
|
337
|
+
break
|
|
338
|
+
if isinstance(_sub_array, np.ndarray) and _sub_array.shape[0] == 0:
|
|
339
|
+
# sequence dimension is 0, we are dealing with "fake" features
|
|
340
|
+
dim = i
|
|
341
|
+
break
|
|
342
|
+
|
|
343
|
+
# If the resulting sub_array is sparse, the remaining number of dimensions
|
|
344
|
+
# should be at least 2
|
|
345
|
+
if isinstance(_sub_array, scipy.sparse.spmatrix):
|
|
346
|
+
if dim > 2:
|
|
347
|
+
raise ValueError(
|
|
348
|
+
f"Given number of dimensions '{number_of_dimensions}' does not "
|
|
349
|
+
f"match dimensions of given input array: {input_array}."
|
|
350
|
+
)
|
|
351
|
+
elif isinstance(_sub_array, np.ndarray) and _sub_array.shape[0] == 0:
|
|
352
|
+
# sequence dimension is 0, we are dealing with "fake" features,
|
|
353
|
+
# but they should be of dim 2
|
|
354
|
+
if dim > 2:
|
|
355
|
+
raise ValueError(
|
|
356
|
+
f"Given number of dimensions '{number_of_dimensions}' does not "
|
|
357
|
+
f"match dimensions of given input array: {input_array}."
|
|
358
|
+
)
|
|
359
|
+
# If the resulting sub_array is dense, the sub_array should be a single number
|
|
360
|
+
elif not np.issubdtype(type(_sub_array), np.integer) and not isinstance(
|
|
361
|
+
_sub_array, (np.float32, np.float64)
|
|
362
|
+
):
|
|
363
|
+
raise ValueError(
|
|
364
|
+
f"Given number of dimensions '{number_of_dimensions}' does not match "
|
|
365
|
+
f"dimensions of given input array: {input_array}."
|
|
366
|
+
)
|