rasa-pro 3.8.18__py3-none-any.whl → 3.9.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- README.md +6 -42
- rasa/__main__.py +14 -9
- rasa/anonymization/anonymization_pipeline.py +0 -1
- rasa/anonymization/anonymization_rule_executor.py +3 -3
- rasa/anonymization/utils.py +4 -3
- rasa/api.py +2 -2
- rasa/cli/arguments/default_arguments.py +1 -1
- rasa/cli/arguments/run.py +2 -2
- rasa/cli/arguments/test.py +1 -1
- rasa/cli/arguments/train.py +10 -10
- rasa/cli/e2e_test.py +27 -7
- rasa/cli/export.py +0 -1
- rasa/cli/license.py +3 -3
- rasa/cli/project_templates/calm/actions/action_template.py +1 -1
- rasa/cli/project_templates/calm/config.yml +1 -1
- rasa/cli/project_templates/calm/credentials.yml +1 -1
- rasa/cli/project_templates/calm/data/flows/add_contact.yml +1 -1
- rasa/cli/project_templates/calm/data/flows/remove_contact.yml +1 -1
- rasa/cli/project_templates/calm/domain/add_contact.yml +8 -2
- rasa/cli/project_templates/calm/domain/list_contacts.yml +3 -0
- rasa/cli/project_templates/calm/domain/remove_contact.yml +9 -2
- rasa/cli/project_templates/calm/domain/shared.yml +5 -0
- rasa/cli/project_templates/calm/endpoints.yml +4 -4
- rasa/cli/project_templates/default/actions/actions.py +1 -1
- rasa/cli/project_templates/default/config.yml +5 -5
- rasa/cli/project_templates/default/credentials.yml +1 -1
- rasa/cli/project_templates/default/endpoints.yml +4 -4
- rasa/cli/project_templates/default/tests/test_stories.yml +1 -1
- rasa/cli/project_templates/tutorial/config.yml +1 -1
- rasa/cli/project_templates/tutorial/credentials.yml +1 -1
- rasa/cli/project_templates/tutorial/data/patterns.yml +6 -0
- rasa/cli/project_templates/tutorial/domain.yml +4 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +6 -6
- rasa/cli/run.py +0 -1
- rasa/cli/scaffold.py +3 -2
- rasa/cli/studio/download.py +11 -0
- rasa/cli/studio/studio.py +180 -24
- rasa/cli/studio/upload.py +0 -8
- rasa/cli/telemetry.py +18 -6
- rasa/cli/utils.py +21 -10
- rasa/cli/x.py +3 -2
- rasa/constants.py +1 -1
- rasa/core/actions/action.py +90 -315
- rasa/core/actions/action_exceptions.py +24 -0
- rasa/core/actions/constants.py +3 -0
- rasa/core/actions/custom_action_executor.py +188 -0
- rasa/core/actions/forms.py +11 -7
- rasa/core/actions/grpc_custom_action_executor.py +251 -0
- rasa/core/actions/http_custom_action_executor.py +140 -0
- rasa/core/actions/loops.py +3 -0
- rasa/core/actions/two_stage_fallback.py +1 -1
- rasa/core/agent.py +2 -4
- rasa/core/brokers/pika.py +1 -2
- rasa/core/channels/audiocodes.py +1 -1
- rasa/core/channels/botframework.py +0 -1
- rasa/core/channels/callback.py +0 -1
- rasa/core/channels/console.py +6 -8
- rasa/core/channels/development_inspector.py +1 -1
- rasa/core/channels/facebook.py +0 -3
- rasa/core/channels/hangouts.py +0 -6
- rasa/core/channels/inspector/dist/assets/{arc-5623b6dc.js → arc-b6e548fe.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-685c106a.js → c4Diagram-d0fbc5ce-fa03ac9e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-8cbed007.js → classDiagram-936ed81e-ee67392a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-5889cf12.js → classDiagram-v2-c3cb15f1-9b283fae.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-24c249d7.js → createText-62fc7601-8b6fcc2a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-7dd06a75.js → edges-f2ad444c-22e77f4f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-62c1e54c.js → erDiagram-9d236eb7-60ffc87f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-ce49b86f.js → flowDb-1972c806-9dd802e4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-4067e48f.js → flowDiagram-7ea5b25a-5fa1912f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-1844e5a5.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-59fe4051.js → flowchart-elk-definition-abe16c3d-622a1fd2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-47e3a43b.js → ganttDiagram-9b5ea136-e285a63a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-5a2ac0d9.js → gitGraphDiagram-99d0ae7c-f237bdca.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-dfb8efc4.js → index-2c4b9a3b-4b03d70e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-268a75c0.js → index-a5d3e69d.js} +4 -4
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-b0c470f2.js → infoDiagram-736b4530-72a0fa5f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-2edb829a.js → journeyDiagram-df861f2b-82218c41.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-b6873d69.js → layout-78cff630.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-1efc5781.js → line-5038b469.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-661e9b94.js → linear-c4fc4098.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-2d2e727f.js → mindmap-definition-beec6740-c33c8ea6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-9d3ea93d.js → pieDiagram-dbbf0591-a8d03059.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-06a178a2.js → quadrantDiagram-4d7f4fd6-6a0e56b2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-0bfedffc.js → requirementDiagram-6fc4c22a-2dc7c7bd.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-d76d0a04.js → sankeyDiagram-8f13d901-2360fe39.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-37bb4341.js → sequenceDiagram-b655622a-41b9f9ad.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-f52f7f57.js → stateDiagram-59f0c015-0aad326f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-4a986a20.js → stateDiagram-v2-2b26beab-9847d984.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-080da4f6-7dd9ae12.js → styles-080da4f6-564d890e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-46e1ca14.js → styles-3dcbcfbf-38957613.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-4a97439a.js → styles-9c745c82-f0fc6921.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-823917a3.js → svgDrawCommon-4835440b-ef3c5a77.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-9ea72896.js → timeline-definition-5b62e21b-bf3e91c1.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-b631a8b6.js → xychartDiagram-2b33534f-4d4026c0.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/src/components/DiagramFlow.tsx +10 -0
- rasa/core/channels/inspector/src/helpers/formatters.test.ts +4 -7
- rasa/core/channels/inspector/src/helpers/formatters.ts +3 -2
- rasa/core/channels/rest.py +36 -21
- rasa/core/channels/rocketchat.py +0 -1
- rasa/core/channels/socketio.py +1 -1
- rasa/core/channels/telegram.py +3 -3
- rasa/core/channels/webexteams.py +0 -1
- rasa/core/concurrent_lock_store.py +1 -1
- rasa/core/evaluation/marker_base.py +1 -3
- rasa/core/evaluation/marker_stats.py +1 -2
- rasa/core/featurizers/single_state_featurizer.py +3 -26
- rasa/core/featurizers/tracker_featurizers.py +18 -122
- rasa/core/information_retrieval/__init__.py +7 -0
- rasa/core/information_retrieval/faiss.py +9 -4
- rasa/core/information_retrieval/information_retrieval.py +64 -7
- rasa/core/information_retrieval/milvus.py +7 -14
- rasa/core/information_retrieval/qdrant.py +8 -15
- rasa/core/lock_store.py +0 -1
- rasa/core/migrate.py +1 -2
- rasa/core/nlg/callback.py +3 -4
- rasa/core/policies/enterprise_search_policy.py +86 -22
- rasa/core/policies/enterprise_search_prompt_template.jinja2 +4 -41
- rasa/core/policies/enterprise_search_prompt_with_citation_template.jinja2 +60 -0
- rasa/core/policies/flows/flow_executor.py +104 -2
- rasa/core/policies/intentless_policy.py +7 -9
- rasa/core/policies/memoization.py +3 -3
- rasa/core/policies/policy.py +18 -9
- rasa/core/policies/rule_policy.py +8 -11
- rasa/core/policies/ted_policy.py +61 -88
- rasa/core/policies/unexpected_intent_policy.py +8 -17
- rasa/core/processor.py +136 -47
- rasa/core/run.py +41 -25
- rasa/core/secrets_manager/endpoints.py +2 -2
- rasa/core/secrets_manager/vault.py +6 -8
- rasa/core/test.py +3 -5
- rasa/core/tracker_store.py +49 -14
- rasa/core/train.py +1 -3
- rasa/core/training/interactive.py +9 -6
- rasa/core/utils.py +5 -10
- rasa/dialogue_understanding/coexistence/intent_based_router.py +11 -4
- rasa/dialogue_understanding/coexistence/llm_based_router.py +2 -3
- rasa/dialogue_understanding/commands/__init__.py +4 -0
- rasa/dialogue_understanding/commands/can_not_handle_command.py +9 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +9 -0
- rasa/dialogue_understanding/commands/change_flow_command.py +38 -0
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +9 -0
- rasa/dialogue_understanding/commands/clarify_command.py +9 -0
- rasa/dialogue_understanding/commands/correct_slots_command.py +9 -0
- rasa/dialogue_understanding/commands/error_command.py +12 -0
- rasa/dialogue_understanding/commands/handle_code_change_command.py +9 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +9 -0
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +9 -0
- rasa/dialogue_understanding/commands/noop_command.py +9 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +38 -3
- rasa/dialogue_understanding/commands/skip_question_command.py +9 -0
- rasa/dialogue_understanding/commands/start_flow_command.py +9 -0
- rasa/dialogue_understanding/generator/__init__.py +16 -1
- rasa/dialogue_understanding/generator/command_generator.py +92 -6
- rasa/dialogue_understanding/generator/constants.py +18 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +7 -5
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +467 -0
- rasa/dialogue_understanding/generator/llm_command_generator.py +39 -609
- rasa/dialogue_understanding/generator/multi_step/__init__.py +0 -0
- rasa/dialogue_understanding/generator/multi_step/fill_slots_prompt.jinja2 +62 -0
- rasa/dialogue_understanding/generator/multi_step/handle_flows_prompt.jinja2 +38 -0
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +827 -0
- rasa/dialogue_understanding/generator/nlu_command_adapter.py +69 -8
- rasa/dialogue_understanding/generator/single_step/__init__.py +0 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +345 -0
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +36 -31
- rasa/dialogue_understanding/processor/command_processor.py +112 -3
- rasa/e2e_test/constants.py +1 -0
- rasa/e2e_test/e2e_test_case.py +44 -0
- rasa/e2e_test/e2e_test_runner.py +114 -11
- rasa/e2e_test/e2e_test_schema.yml +18 -0
- rasa/engine/caching.py +0 -1
- rasa/engine/graph.py +18 -6
- rasa/engine/recipes/config_files/default_config.yml +3 -3
- rasa/engine/recipes/default_components.py +1 -1
- rasa/engine/recipes/default_recipe.py +4 -5
- rasa/engine/recipes/recipe.py +1 -1
- rasa/engine/runner/dask.py +3 -9
- rasa/engine/storage/local_model_storage.py +0 -2
- rasa/engine/validation.py +179 -145
- rasa/exceptions.py +2 -2
- rasa/graph_components/validators/default_recipe_validator.py +3 -5
- rasa/hooks.py +0 -1
- rasa/model.py +1 -1
- rasa/model_training.py +1 -0
- rasa/nlu/classifiers/diet_classifier.py +33 -52
- rasa/nlu/classifiers/logistic_regression_classifier.py +9 -22
- rasa/nlu/classifiers/sklearn_intent_classifier.py +16 -37
- rasa/nlu/extractors/crf_entity_extractor.py +54 -97
- rasa/nlu/extractors/duckling_entity_extractor.py +1 -1
- rasa/nlu/featurizers/dense_featurizer/convert_featurizer.py +1 -5
- rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py +0 -4
- rasa/nlu/featurizers/featurizer.py +1 -1
- rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py +18 -49
- rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +26 -64
- rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py +3 -5
- rasa/nlu/persistor.py +68 -26
- rasa/nlu/selectors/response_selector.py +7 -10
- rasa/nlu/test.py +0 -3
- rasa/nlu/utils/hugging_face/registry.py +1 -1
- rasa/nlu/utils/spacy_utils.py +1 -3
- rasa/server.py +22 -7
- rasa/shared/constants.py +12 -1
- rasa/shared/core/command_payload_reader.py +109 -0
- rasa/shared/core/constants.py +4 -5
- rasa/shared/core/domain.py +57 -56
- rasa/shared/core/events.py +4 -7
- rasa/shared/core/flows/flow.py +9 -0
- rasa/shared/core/flows/flows_list.py +12 -0
- rasa/shared/core/flows/steps/action.py +7 -2
- rasa/shared/core/generator.py +12 -11
- rasa/shared/core/slot_mappings.py +315 -24
- rasa/shared/core/slots.py +4 -2
- rasa/shared/core/trackers.py +32 -14
- rasa/shared/core/training_data/loading.py +0 -1
- rasa/shared/core/training_data/story_reader/story_reader.py +3 -3
- rasa/shared/core/training_data/story_reader/yaml_story_reader.py +11 -11
- rasa/shared/core/training_data/story_writer/yaml_story_writer.py +5 -3
- rasa/shared/core/training_data/structures.py +1 -1
- rasa/shared/core/training_data/visualization.py +1 -1
- rasa/shared/data.py +58 -1
- rasa/shared/exceptions.py +36 -2
- rasa/shared/importers/importer.py +1 -2
- rasa/shared/importers/rasa.py +0 -1
- rasa/shared/nlu/constants.py +2 -0
- rasa/shared/nlu/training_data/entities_parser.py +1 -2
- rasa/shared/nlu/training_data/features.py +2 -120
- rasa/shared/nlu/training_data/formats/dialogflow.py +3 -2
- rasa/shared/nlu/training_data/formats/rasa_yaml.py +3 -5
- rasa/shared/nlu/training_data/formats/readerwriter.py +0 -1
- rasa/shared/nlu/training_data/message.py +13 -0
- rasa/shared/nlu/training_data/training_data.py +0 -2
- rasa/shared/providers/openai/session_handler.py +2 -2
- rasa/shared/utils/constants.py +3 -0
- rasa/shared/utils/io.py +11 -1
- rasa/shared/utils/llm.py +1 -2
- rasa/shared/utils/pykwalify_extensions.py +1 -0
- rasa/shared/utils/schemas/domain.yml +3 -0
- rasa/shared/utils/yaml.py +44 -35
- rasa/studio/auth.py +26 -10
- rasa/studio/constants.py +2 -0
- rasa/studio/data_handler.py +114 -107
- rasa/studio/download.py +160 -27
- rasa/studio/results_logger.py +137 -0
- rasa/studio/train.py +6 -7
- rasa/studio/upload.py +159 -134
- rasa/telemetry.py +188 -34
- rasa/tracing/config.py +18 -3
- rasa/tracing/constants.py +26 -2
- rasa/tracing/instrumentation/attribute_extractors.py +50 -41
- rasa/tracing/instrumentation/instrumentation.py +290 -44
- rasa/tracing/instrumentation/intentless_policy_instrumentation.py +7 -5
- rasa/tracing/instrumentation/metrics.py +109 -21
- rasa/tracing/metric_instrument_provider.py +83 -3
- rasa/utils/cli.py +2 -1
- rasa/utils/common.py +1 -1
- rasa/utils/endpoints.py +1 -2
- rasa/utils/io.py +72 -6
- rasa/utils/licensing.py +246 -31
- rasa/utils/ml_utils.py +1 -1
- rasa/utils/tensorflow/data_generator.py +1 -1
- rasa/utils/tensorflow/environment.py +1 -1
- rasa/utils/tensorflow/model_data.py +201 -12
- rasa/utils/tensorflow/model_data_utils.py +499 -500
- rasa/utils/tensorflow/models.py +5 -6
- rasa/utils/tensorflow/rasa_layers.py +15 -15
- rasa/utils/train_utils.py +1 -1
- rasa/utils/url_tools.py +53 -0
- rasa/validator.py +305 -3
- rasa/version.py +1 -1
- {rasa_pro-3.8.18.dist-info → rasa_pro-3.9.15.dist-info}/METADATA +25 -61
- {rasa_pro-3.8.18.dist-info → rasa_pro-3.9.15.dist-info}/RECORD +276 -259
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-85583a23.js +0 -1
- rasa/utils/tensorflow/feature_array.py +0 -370
- /rasa/dialogue_understanding/generator/{command_prompt_template.jinja2 → single_step/command_prompt_template.jinja2} +0 -0
- {rasa_pro-3.8.18.dist-info → rasa_pro-3.9.15.dist-info}/NOTICE +0 -0
- {rasa_pro-3.8.18.dist-info → rasa_pro-3.9.15.dist-info}/WHEEL +0 -0
- {rasa_pro-3.8.18.dist-info → rasa_pro-3.9.15.dist-info}/entry_points.txt +0 -0
|
@@ -189,7 +189,7 @@ class ConveRTFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
189
189
|
f"Parameter 'model_url' of "
|
|
190
190
|
f"'{ConveRTFeaturizer.__name__}' was "
|
|
191
191
|
f"set to '{model_url}' which is strictly reserved for pytests of "
|
|
192
|
-
f"Rasa
|
|
192
|
+
f"Rasa Pro only. Due to licensing issues you are "
|
|
193
193
|
f"not allowed to use the model from this URL. "
|
|
194
194
|
f"You can either use a community hosted URL or if you have a "
|
|
195
195
|
f"local copy of the model, pass the path to the directory "
|
|
@@ -323,13 +323,11 @@ class ConveRTFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
323
323
|
return texts
|
|
324
324
|
|
|
325
325
|
def _sentence_encoding_of_text(self, batch: List[Text]) -> np.ndarray:
|
|
326
|
-
|
|
327
326
|
return self.sentence_encoding_signature(tf.convert_to_tensor(batch))[
|
|
328
327
|
"default"
|
|
329
328
|
].numpy()
|
|
330
329
|
|
|
331
330
|
def _sequence_encoding_of_text(self, batch: List[Text]) -> np.ndarray:
|
|
332
|
-
|
|
333
331
|
return self.sequence_encoding_signature(tf.convert_to_tensor(batch))[
|
|
334
332
|
"sequence_encoding"
|
|
335
333
|
].numpy()
|
|
@@ -346,7 +344,6 @@ class ConveRTFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
346
344
|
batch_size = 64
|
|
347
345
|
|
|
348
346
|
for attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
|
|
349
|
-
|
|
350
347
|
non_empty_examples = list(
|
|
351
348
|
filter(lambda x: x.get(attribute), training_data.training_examples)
|
|
352
349
|
)
|
|
@@ -410,7 +407,6 @@ class ConveRTFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
410
407
|
)
|
|
411
408
|
|
|
412
409
|
def _tokenize(self, sentence: Text) -> Any:
|
|
413
|
-
|
|
414
410
|
return self.tokenize_signature(tf.convert_to_tensor([sentence]))[
|
|
415
411
|
"default"
|
|
416
412
|
].numpy()
|
|
@@ -316,7 +316,6 @@ class LanguageModelFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
316
316
|
batch_token_ids = []
|
|
317
317
|
batch_tokens = []
|
|
318
318
|
for example in batch_examples:
|
|
319
|
-
|
|
320
319
|
example_tokens, example_token_ids = self._tokenize_example(
|
|
321
320
|
example, attribute
|
|
322
321
|
)
|
|
@@ -416,7 +415,6 @@ class LanguageModelFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
416
415
|
# This doesn't affect the computation since we compute an attention mask
|
|
417
416
|
# anyways.
|
|
418
417
|
for example_token_ids in batch_token_ids:
|
|
419
|
-
|
|
420
418
|
# Truncate any longer sequences so that they can be fed to the model
|
|
421
419
|
if len(example_token_ids) > max_sequence_length_model:
|
|
422
420
|
example_token_ids = example_token_ids[:max_sequence_length_model]
|
|
@@ -710,7 +708,6 @@ class LanguageModelFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
710
708
|
batch_size = 64
|
|
711
709
|
|
|
712
710
|
for attribute in DENSE_FEATURIZABLE_ATTRIBUTES:
|
|
713
|
-
|
|
714
711
|
non_empty_examples = list(
|
|
715
712
|
filter(lambda x: x.get(attribute), training_data.training_examples)
|
|
716
713
|
)
|
|
@@ -718,7 +715,6 @@ class LanguageModelFeaturizer(DenseFeaturizer, GraphComponent):
|
|
|
718
715
|
batch_start_index = 0
|
|
719
716
|
|
|
720
717
|
while batch_start_index < len(non_empty_examples):
|
|
721
|
-
|
|
722
718
|
batch_end_index = min(
|
|
723
719
|
batch_start_index + batch_size, len(non_empty_examples)
|
|
724
720
|
)
|
|
@@ -64,7 +64,7 @@ class Featurizer(Generic[FeatureType], ABC):
|
|
|
64
64
|
|
|
65
65
|
@staticmethod
|
|
66
66
|
def raise_if_featurizer_configs_are_not_compatible(
|
|
67
|
-
featurizer_configs: Iterable[Dict[Text, Any]]
|
|
67
|
+
featurizer_configs: Iterable[Dict[Text, Any]],
|
|
68
68
|
) -> None:
|
|
69
69
|
"""Validates that the given configurations of featurizers can be used together.
|
|
70
70
|
|
|
@@ -1,32 +1,30 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
2
|
import logging
|
|
4
3
|
import re
|
|
5
|
-
from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type, Union
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
4
|
import scipy.sparse
|
|
9
|
-
from
|
|
10
|
-
from
|
|
5
|
+
from typing import Any, Dict, List, Optional, Text, Tuple, Set, Type
|
|
6
|
+
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
11
7
|
|
|
12
8
|
import rasa.shared.utils.io
|
|
13
9
|
from rasa.engine.graph import GraphComponent, ExecutionContext
|
|
14
10
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
15
11
|
from rasa.engine.storage.resource import Resource
|
|
16
12
|
from rasa.engine.storage.storage import ModelStorage
|
|
13
|
+
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
14
|
+
from rasa.nlu.utils.spacy_utils import SpacyModel
|
|
15
|
+
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
16
|
+
import rasa.utils.io as io_utils
|
|
17
|
+
from sklearn.exceptions import NotFittedError
|
|
18
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
19
|
+
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
20
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
21
|
+
from rasa.shared.exceptions import RasaException, FileIOException
|
|
17
22
|
from rasa.nlu.constants import (
|
|
18
23
|
TOKENS_NAMES,
|
|
19
24
|
MESSAGE_ATTRIBUTES,
|
|
20
25
|
DENSE_FEATURIZABLE_ATTRIBUTES,
|
|
21
26
|
)
|
|
22
|
-
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
23
|
-
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
24
|
-
from rasa.nlu.utils.spacy_utils import SpacyModel
|
|
25
|
-
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
26
|
-
from rasa.shared.exceptions import RasaException, FileIOException
|
|
27
27
|
from rasa.shared.nlu.constants import TEXT, INTENT, INTENT_RESPONSE_KEY, ACTION_NAME
|
|
28
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
29
|
-
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
30
28
|
|
|
31
29
|
BUFFER_SLOTS_PREFIX = "buf_"
|
|
32
30
|
|
|
@@ -101,7 +99,6 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
101
99
|
return ["sklearn"]
|
|
102
100
|
|
|
103
101
|
def _load_count_vect_params(self) -> None:
|
|
104
|
-
|
|
105
102
|
# Use shared vocabulary between text and all other attributes of Message
|
|
106
103
|
self.use_shared_vocab = self._config["use_shared_vocab"]
|
|
107
104
|
|
|
@@ -342,7 +339,7 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
342
339
|
|
|
343
340
|
@staticmethod
|
|
344
341
|
def _convert_attribute_tokens_to_texts(
|
|
345
|
-
attribute_tokens: Dict[Text, List[List[Text]]]
|
|
342
|
+
attribute_tokens: Dict[Text, List[List[Text]]],
|
|
346
343
|
) -> Dict[Text, List[Text]]:
|
|
347
344
|
attribute_texts = {}
|
|
348
345
|
|
|
@@ -661,7 +658,6 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
661
658
|
|
|
662
659
|
for message in messages:
|
|
663
660
|
for attribute in self._attributes:
|
|
664
|
-
|
|
665
661
|
message_tokens = self._get_processed_message_tokens_by_attribute(
|
|
666
662
|
message, attribute
|
|
667
663
|
)
|
|
@@ -687,36 +683,11 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
687
683
|
|
|
688
684
|
@staticmethod
|
|
689
685
|
def _is_any_model_trained(
|
|
690
|
-
attribute_vocabularies: Dict[Text, Optional[Dict[Text, int]]]
|
|
686
|
+
attribute_vocabularies: Dict[Text, Optional[Dict[Text, int]]],
|
|
691
687
|
) -> bool:
|
|
692
688
|
"""Check if any model got trained."""
|
|
693
689
|
return any(value is not None for value in attribute_vocabularies.values())
|
|
694
690
|
|
|
695
|
-
@staticmethod
|
|
696
|
-
def convert_vocab(
|
|
697
|
-
vocab: Dict[str, Union[int, Optional[Dict[str, int]]]], to_int: bool
|
|
698
|
-
) -> Dict[str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]]:
|
|
699
|
-
"""Converts numpy integers in the vocabulary to Python integers."""
|
|
700
|
-
|
|
701
|
-
def convert_value(value: int) -> Union[int, np.int64]:
|
|
702
|
-
"""Helper function to convert a single value based on to_int flag."""
|
|
703
|
-
return int(value) if to_int else np.int64(value)
|
|
704
|
-
|
|
705
|
-
result_dict: Dict[
|
|
706
|
-
str, Union[None, int, np.int64, Dict[str, Union[int, np.int64]]]
|
|
707
|
-
] = {}
|
|
708
|
-
for key, sub_dict in vocab.items():
|
|
709
|
-
if isinstance(sub_dict, int):
|
|
710
|
-
result_dict[key] = convert_value(sub_dict)
|
|
711
|
-
elif not sub_dict:
|
|
712
|
-
result_dict[key] = None
|
|
713
|
-
else:
|
|
714
|
-
result_dict[key] = {
|
|
715
|
-
sub_key: convert_value(value) for sub_key, value in sub_dict.items()
|
|
716
|
-
}
|
|
717
|
-
|
|
718
|
-
return result_dict
|
|
719
|
-
|
|
720
691
|
def persist(self) -> None:
|
|
721
692
|
"""Persist this model into the passed directory.
|
|
722
693
|
|
|
@@ -730,18 +701,17 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
730
701
|
attribute_vocabularies = self._collect_vectorizer_vocabularies()
|
|
731
702
|
if self._is_any_model_trained(attribute_vocabularies):
|
|
732
703
|
# Definitely need to persist some vocabularies
|
|
733
|
-
featurizer_file = model_dir / "vocabularies.
|
|
704
|
+
featurizer_file = model_dir / "vocabularies.pkl"
|
|
734
705
|
|
|
735
706
|
# Only persist vocabulary from one attribute if `use_shared_vocab`.
|
|
736
707
|
# Can be loaded and distributed to all attributes.
|
|
737
|
-
|
|
708
|
+
vocab = (
|
|
738
709
|
attribute_vocabularies[TEXT]
|
|
739
710
|
if self.use_shared_vocab
|
|
740
711
|
else attribute_vocabularies
|
|
741
712
|
)
|
|
742
|
-
vocab = self.convert_vocab(loaded_vocab, to_int=True)
|
|
743
713
|
|
|
744
|
-
|
|
714
|
+
io_utils.json_pickle(featurizer_file, vocab)
|
|
745
715
|
|
|
746
716
|
# Dump OOV words separately as they might have been modified during
|
|
747
717
|
# training
|
|
@@ -816,9 +786,8 @@ class CountVectorsFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
816
786
|
"""Loads trained component (see parent class for full docstring)."""
|
|
817
787
|
try:
|
|
818
788
|
with model_storage.read_from(resource) as model_dir:
|
|
819
|
-
featurizer_file = model_dir / "vocabularies.
|
|
820
|
-
vocabulary =
|
|
821
|
-
vocabulary = cls.convert_vocab(vocabulary, to_int=False)
|
|
789
|
+
featurizer_file = model_dir / "vocabularies.pkl"
|
|
790
|
+
vocabulary = io_utils.json_unpickle(featurizer_file)
|
|
822
791
|
|
|
823
792
|
share_vocabulary = config["use_shared_vocab"]
|
|
824
793
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
2
|
import logging
|
|
4
3
|
from collections import OrderedDict
|
|
4
|
+
|
|
5
|
+
import scipy.sparse
|
|
6
|
+
import numpy as np
|
|
5
7
|
from typing import (
|
|
6
8
|
Any,
|
|
7
9
|
Dict,
|
|
@@ -15,34 +17,30 @@ from typing import (
|
|
|
15
17
|
Union,
|
|
16
18
|
)
|
|
17
19
|
|
|
18
|
-
import numpy as np
|
|
19
|
-
import scipy.sparse
|
|
20
|
-
|
|
21
|
-
import rasa.shared.utils.io
|
|
22
|
-
import rasa.utils.io
|
|
23
20
|
from rasa.engine.graph import ExecutionContext, GraphComponent
|
|
24
21
|
from rasa.engine.recipes.default_recipe import DefaultV1Recipe
|
|
25
22
|
from rasa.engine.storage.resource import Resource
|
|
26
23
|
from rasa.engine.storage.storage import ModelStorage
|
|
27
|
-
from rasa.nlu.constants import TOKENS_NAMES
|
|
28
|
-
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
29
24
|
from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY, SpacyTokenizer
|
|
30
25
|
from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
|
|
26
|
+
from rasa.nlu.featurizers.sparse_featurizer.sparse_featurizer import SparseFeaturizer
|
|
27
|
+
from rasa.nlu.constants import TOKENS_NAMES
|
|
31
28
|
from rasa.shared.constants import DOCS_URL_COMPONENTS
|
|
32
|
-
from rasa.shared.exceptions import InvalidConfigException
|
|
33
|
-
from rasa.shared.nlu.constants import TEXT
|
|
34
|
-
from rasa.shared.nlu.training_data.message import Message
|
|
35
29
|
from rasa.shared.nlu.training_data.training_data import TrainingData
|
|
30
|
+
from rasa.shared.nlu.training_data.message import Message
|
|
31
|
+
from rasa.shared.nlu.constants import TEXT
|
|
32
|
+
from rasa.shared.exceptions import InvalidConfigException
|
|
33
|
+
import rasa.shared.utils.io
|
|
34
|
+
import rasa.utils.io
|
|
36
35
|
|
|
37
36
|
logger = logging.getLogger(__name__)
|
|
38
37
|
|
|
38
|
+
|
|
39
39
|
END_OF_SENTENCE = "EOS"
|
|
40
40
|
BEGIN_OF_SENTENCE = "BOS"
|
|
41
41
|
|
|
42
42
|
FEATURES = "features"
|
|
43
43
|
|
|
44
|
-
SEPERATOR = "###"
|
|
45
|
-
|
|
46
44
|
|
|
47
45
|
@DefaultV1Recipe.register(
|
|
48
46
|
DefaultV1Recipe.ComponentType.MESSAGE_FEATURIZER, is_trainable=True
|
|
@@ -74,13 +72,11 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
74
72
|
of the token at position `t+1`.
|
|
75
73
|
"""
|
|
76
74
|
|
|
77
|
-
FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.
|
|
75
|
+
FILENAME_FEATURE_TO_IDX_DICT = "feature_to_idx_dict.pkl"
|
|
78
76
|
|
|
79
77
|
# NOTE: "suffix5" of the token "is" will be "is". Hence, when combining multiple
|
|
80
78
|
# prefixes, short words will be represented/encoded repeatedly.
|
|
81
|
-
_FUNCTION_DICT: Dict[
|
|
82
|
-
Text, Callable[[Token], Union[Text, bool, None]]
|
|
83
|
-
] = { # noqa: RUF012
|
|
79
|
+
_FUNCTION_DICT: Dict[Text, Callable[[Token], Union[Text, bool, None]]] = { # noqa: RUF012
|
|
84
80
|
"low": lambda token: token.text.islower(),
|
|
85
81
|
"title": lambda token: token.text.istitle(),
|
|
86
82
|
"prefix5": lambda token: token.text[:5],
|
|
@@ -331,7 +327,6 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
331
327
|
assert len(window_range) == window_size
|
|
332
328
|
|
|
333
329
|
for anchor in range(len(tokens)):
|
|
334
|
-
|
|
335
330
|
token_features: Dict[Tuple[int, Text], Text] = {}
|
|
336
331
|
|
|
337
332
|
for window_position, relative_position in enumerate(window_range):
|
|
@@ -343,13 +338,13 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
343
338
|
|
|
344
339
|
token = tokens[absolute_position]
|
|
345
340
|
for feature_name in self._feature_config[window_position]:
|
|
346
|
-
token_features[
|
|
347
|
-
(
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
341
|
+
token_features[(window_position, feature_name)] = (
|
|
342
|
+
self._extract_raw_features_from_token(
|
|
343
|
+
token=token,
|
|
344
|
+
feature_name=feature_name,
|
|
345
|
+
token_position=absolute_position,
|
|
346
|
+
num_tokens=len(tokens),
|
|
347
|
+
)
|
|
353
348
|
)
|
|
354
349
|
|
|
355
350
|
sentence_features.append(token_features)
|
|
@@ -358,7 +353,7 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
358
353
|
|
|
359
354
|
@staticmethod
|
|
360
355
|
def _build_feature_to_index_map(
|
|
361
|
-
feature_vocabulary: Dict[Tuple[int, Text], Set[Text]]
|
|
356
|
+
feature_vocabulary: Dict[Tuple[int, Text], Set[Text]],
|
|
362
357
|
) -> Dict[Tuple[int, Text], Dict[Text, int]]:
|
|
363
358
|
"""Creates a nested dictionary for mapping raw features to indices.
|
|
364
359
|
|
|
@@ -493,32 +488,6 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
493
488
|
"""Creates a new untrained component (see parent class for full docstring)."""
|
|
494
489
|
return cls(config, model_storage, resource, execution_context)
|
|
495
490
|
|
|
496
|
-
@staticmethod
|
|
497
|
-
def _restructure_feature_to_idx_dict(
|
|
498
|
-
loaded_data: Dict[str, Dict[str, int]],
|
|
499
|
-
) -> Dict[Tuple[int, str], Dict[str, int]]:
|
|
500
|
-
"""Reconstructs the feature to idx dict.
|
|
501
|
-
|
|
502
|
-
When storing the feature_to_idx_dict to disk, we need to convert the tuple (key)
|
|
503
|
-
into a string to be able to store it via json. When loading the data
|
|
504
|
-
we need to reconstruct the tuple from the stored string.
|
|
505
|
-
|
|
506
|
-
Args:
|
|
507
|
-
loaded_data: The loaded feature to idx dict from file.
|
|
508
|
-
|
|
509
|
-
Returns:
|
|
510
|
-
The reconstructed feature_to_idx_dict
|
|
511
|
-
"""
|
|
512
|
-
feature_to_idx_dict = {}
|
|
513
|
-
for tuple_string, feature_value in loaded_data.items():
|
|
514
|
-
# Example of tuple_string: "1###low"
|
|
515
|
-
index, feature_name = tuple_string.split(SEPERATOR)
|
|
516
|
-
|
|
517
|
-
feature_key = (int(index), feature_name)
|
|
518
|
-
feature_to_idx_dict[feature_key] = feature_value
|
|
519
|
-
|
|
520
|
-
return feature_to_idx_dict
|
|
521
|
-
|
|
522
491
|
@classmethod
|
|
523
492
|
def load(
|
|
524
493
|
cls,
|
|
@@ -531,13 +500,10 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
531
500
|
"""Loads trained component (see parent class for full docstring)."""
|
|
532
501
|
try:
|
|
533
502
|
with model_storage.read_from(resource) as model_path:
|
|
534
|
-
|
|
503
|
+
feature_to_idx_dict = rasa.utils.io.json_unpickle(
|
|
535
504
|
model_path / cls.FILENAME_FEATURE_TO_IDX_DICT,
|
|
505
|
+
encode_non_string_keys=True,
|
|
536
506
|
)
|
|
537
|
-
|
|
538
|
-
# convert the key back into tuple
|
|
539
|
-
feature_to_idx_dict = cls._restructure_feature_to_idx_dict(loaded_data)
|
|
540
|
-
|
|
541
507
|
return cls(
|
|
542
508
|
config=config,
|
|
543
509
|
model_storage=model_storage,
|
|
@@ -562,13 +528,9 @@ class LexicalSyntacticFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
562
528
|
if not self._feature_to_idx_dict:
|
|
563
529
|
return None
|
|
564
530
|
|
|
565
|
-
# as we cannot dump tuples, convert the tuple into a string
|
|
566
|
-
restructured_feature_dict = {
|
|
567
|
-
f"{k[0]}{SEPERATOR}{k[1]}": v for k, v in self._feature_to_idx_dict.items()
|
|
568
|
-
}
|
|
569
|
-
|
|
570
531
|
with self._model_storage.write_to(self._resource) as model_path:
|
|
571
|
-
rasa.
|
|
532
|
+
rasa.utils.io.json_pickle(
|
|
572
533
|
model_path / self.FILENAME_FEATURE_TO_IDX_DICT,
|
|
573
|
-
|
|
534
|
+
self._feature_to_idx_dict,
|
|
535
|
+
encode_non_string_keys=True,
|
|
574
536
|
)
|
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
2
|
import logging
|
|
4
3
|
import re
|
|
5
4
|
from typing import Any, Dict, List, Optional, Text, Tuple, Type
|
|
6
|
-
|
|
7
5
|
import numpy as np
|
|
8
6
|
import scipy.sparse
|
|
9
|
-
|
|
10
7
|
from rasa.nlu.tokenizers.tokenizer import Tokenizer
|
|
8
|
+
|
|
11
9
|
import rasa.shared.utils.io
|
|
12
10
|
import rasa.utils.io
|
|
13
11
|
import rasa.nlu.utils.pattern_utils as pattern_utils
|
|
@@ -242,7 +240,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
242
240
|
|
|
243
241
|
try:
|
|
244
242
|
with model_storage.read_from(resource) as model_dir:
|
|
245
|
-
patterns_file_name = model_dir / "patterns.
|
|
243
|
+
patterns_file_name = model_dir / "patterns.pkl"
|
|
246
244
|
known_patterns = rasa.shared.utils.io.read_json_file(patterns_file_name)
|
|
247
245
|
except (ValueError, FileNotFoundError):
|
|
248
246
|
logger.warning(
|
|
@@ -260,7 +258,7 @@ class RegexFeaturizer(SparseFeaturizer, GraphComponent):
|
|
|
260
258
|
|
|
261
259
|
def _persist(self) -> None:
|
|
262
260
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
263
|
-
regex_file = model_dir / "patterns.
|
|
261
|
+
regex_file = model_dir / "patterns.pkl"
|
|
264
262
|
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
265
263
|
regex_file, self.known_patterns
|
|
266
264
|
)
|
rasa/nlu/persistor.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import abc
|
|
2
|
-
import
|
|
2
|
+
import structlog
|
|
3
3
|
import os
|
|
4
4
|
import shutil
|
|
5
5
|
from typing import Optional, Text, Tuple, TYPE_CHECKING
|
|
6
6
|
|
|
7
|
+
from rasa.shared.exceptions import RasaException
|
|
8
|
+
|
|
7
9
|
import rasa.shared.utils.common
|
|
8
10
|
import rasa.utils.common
|
|
9
11
|
|
|
10
12
|
if TYPE_CHECKING:
|
|
11
13
|
from azure.storage.blob import ContainerClient
|
|
12
14
|
|
|
13
|
-
|
|
15
|
+
structlogger = structlog.get_logger()
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
def get_persistor(name: Text) -> Optional["Persistor"]:
|
|
@@ -95,7 +97,6 @@ class Persistor(abc.ABC):
|
|
|
95
97
|
|
|
96
98
|
@staticmethod
|
|
97
99
|
def _tar_name(model_name: Text, include_extension: bool = True) -> Text:
|
|
98
|
-
|
|
99
100
|
ext = ".tar.gz" if include_extension else ""
|
|
100
101
|
return f"{model_name}{ext}"
|
|
101
102
|
|
|
@@ -129,20 +130,36 @@ class AWSPersistor(Persistor):
|
|
|
129
130
|
def _ensure_bucket_exists(
|
|
130
131
|
self, bucket_name: Text, region_name: Optional[Text] = None
|
|
131
132
|
) -> None:
|
|
132
|
-
import boto3
|
|
133
133
|
import botocore
|
|
134
134
|
|
|
135
|
-
if not region_name:
|
|
136
|
-
region_name = boto3.DEFAULT_SESSION.region_name
|
|
137
|
-
|
|
138
|
-
bucket_config = {"LocationConstraint": region_name}
|
|
139
135
|
# noinspection PyUnresolvedReferences
|
|
140
136
|
try:
|
|
141
|
-
self.s3.
|
|
142
|
-
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
|
|
137
|
+
self.s3.meta.client.head_bucket(Bucket=bucket_name)
|
|
138
|
+
except botocore.exceptions.ClientError as e:
|
|
139
|
+
error_code = int(e.response["Error"]["Code"])
|
|
140
|
+
if error_code == 403:
|
|
141
|
+
log = (
|
|
142
|
+
f"Access to the specified bucket '{bucket_name}' is forbidden. "
|
|
143
|
+
"Please make sure you have the necessary "
|
|
144
|
+
"permission to access the bucket."
|
|
145
|
+
)
|
|
146
|
+
structlogger.error(
|
|
147
|
+
"aws_persistor.ensure_bucket_exists.bucket_access_forbidden",
|
|
148
|
+
bucket_name=bucket_name,
|
|
149
|
+
event_info=log,
|
|
150
|
+
)
|
|
151
|
+
raise RasaException(log)
|
|
152
|
+
elif error_code == 404:
|
|
153
|
+
log = (
|
|
154
|
+
f"The specified bucket '{bucket_name}' does not exist. "
|
|
155
|
+
"Please make sure to create the bucket first."
|
|
156
|
+
)
|
|
157
|
+
structlogger.error(
|
|
158
|
+
"aws_persistor.ensure_bucket_exists.bucket_not_found",
|
|
159
|
+
bucket_name=bucket_name,
|
|
160
|
+
event_info=log,
|
|
161
|
+
)
|
|
162
|
+
raise RasaException(log)
|
|
146
163
|
|
|
147
164
|
def _persist_tar(self, file_key: Text, tar_path: Text) -> None:
|
|
148
165
|
"""Uploads a model persisted in the `target_dir` to s3."""
|
|
@@ -180,10 +197,30 @@ class GCSPersistor(Persistor):
|
|
|
180
197
|
from google.cloud import exceptions
|
|
181
198
|
|
|
182
199
|
try:
|
|
183
|
-
self.storage_client.
|
|
184
|
-
except exceptions.
|
|
185
|
-
|
|
186
|
-
|
|
200
|
+
self.storage_client.get_bucket(bucket_name)
|
|
201
|
+
except exceptions.NotFound:
|
|
202
|
+
log = (
|
|
203
|
+
f"The specified bucket '{bucket_name}' does not exist. "
|
|
204
|
+
"Please make sure to create the bucket first."
|
|
205
|
+
)
|
|
206
|
+
structlogger.error(
|
|
207
|
+
"gcp_persistor.ensure_bucket_exists.bucket_not_found",
|
|
208
|
+
bucket_name=bucket_name,
|
|
209
|
+
event_info=log,
|
|
210
|
+
)
|
|
211
|
+
raise RasaException(log)
|
|
212
|
+
except exceptions.Forbidden:
|
|
213
|
+
log = (
|
|
214
|
+
f"Access to the specified bucket '{bucket_name}' is forbidden. "
|
|
215
|
+
"Please make sure you have the necessary "
|
|
216
|
+
"permission to access the bucket. "
|
|
217
|
+
)
|
|
218
|
+
structlogger.error(
|
|
219
|
+
"gcp_persistor.ensure_bucket_exists.bucket_access_forbidden",
|
|
220
|
+
bucket_name=bucket_name,
|
|
221
|
+
event_info=log,
|
|
222
|
+
)
|
|
223
|
+
raise RasaException(log)
|
|
187
224
|
|
|
188
225
|
def _persist_tar(self, file_key: Text, tar_path: Text) -> None:
|
|
189
226
|
"""Uploads a model persisted in the `target_dir` to GCS."""
|
|
@@ -210,18 +247,23 @@ class AzurePersistor(Persistor):
|
|
|
210
247
|
account_url=f"https://{azure_account_name}.blob.core.windows.net/",
|
|
211
248
|
credential=azure_account_key,
|
|
212
249
|
)
|
|
213
|
-
|
|
214
|
-
self._ensure_container_exists(azure_container)
|
|
215
250
|
self.container_name = azure_container
|
|
251
|
+
self._ensure_container_exists()
|
|
216
252
|
|
|
217
|
-
def _ensure_container_exists(self
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
try:
|
|
221
|
-
self.blob_service.create_container(container_name)
|
|
222
|
-
except ResourceExistsError:
|
|
223
|
-
# no need to create the container, it already exists
|
|
253
|
+
def _ensure_container_exists(self) -> None:
|
|
254
|
+
if self._container_client().exists():
|
|
224
255
|
pass
|
|
256
|
+
else:
|
|
257
|
+
log = (
|
|
258
|
+
f"The specified container '{self.container_name}' does not exist."
|
|
259
|
+
"Please make sure to create the container first."
|
|
260
|
+
)
|
|
261
|
+
structlogger.error(
|
|
262
|
+
"azure_persistor.ensure_container_exists.container_not_found",
|
|
263
|
+
container_name=self.container_name,
|
|
264
|
+
event_info=log,
|
|
265
|
+
)
|
|
266
|
+
raise RasaException(log)
|
|
225
267
|
|
|
226
268
|
def _container_client(self) -> "ContainerClient":
|
|
227
269
|
return self.blob_service.get_container_client(self.container_name)
|
|
@@ -430,9 +430,9 @@ class ResponseSelector(DIETClassifier):
|
|
|
430
430
|
self, message: Message, prediction_dict: Dict[Text, Any], selector_key: Text
|
|
431
431
|
) -> None:
|
|
432
432
|
message_selector_properties = message.get(RESPONSE_SELECTOR_PROPERTY_NAME, {})
|
|
433
|
-
message_selector_properties[
|
|
434
|
-
|
|
435
|
-
|
|
433
|
+
message_selector_properties[RESPONSE_SELECTOR_RETRIEVAL_INTENTS] = (
|
|
434
|
+
self.all_retrieval_intents
|
|
435
|
+
)
|
|
436
436
|
message_selector_properties[selector_key] = prediction_dict
|
|
437
437
|
message.set(
|
|
438
438
|
RESPONSE_SELECTOR_PROPERTY_NAME,
|
|
@@ -505,7 +505,6 @@ class ResponseSelector(DIETClassifier):
|
|
|
505
505
|
been caught earlier and a warning should have been raised.
|
|
506
506
|
"""
|
|
507
507
|
for key, responses in self.responses.items():
|
|
508
|
-
|
|
509
508
|
# First check if the predicted label was the key itself
|
|
510
509
|
search_key = util.template_key_to_intent_response_key(key)
|
|
511
510
|
if search_key == label.get("name"):
|
|
@@ -626,7 +625,6 @@ class ResponseSelector(DIETClassifier):
|
|
|
626
625
|
config: Dict[Text, Any],
|
|
627
626
|
finetune_mode: bool = False,
|
|
628
627
|
) -> "RasaModel":
|
|
629
|
-
|
|
630
628
|
predict_data_example = RasaModelData(
|
|
631
629
|
label_key=model_data_example.label_key,
|
|
632
630
|
data={
|
|
@@ -723,7 +721,6 @@ class DIET2BOW(DIET):
|
|
|
723
721
|
logger.debug(f" {metric} ({name})")
|
|
724
722
|
|
|
725
723
|
def _update_label_metrics(self, loss: tf.Tensor, acc: tf.Tensor) -> None:
|
|
726
|
-
|
|
727
724
|
self.response_loss.update_state(loss)
|
|
728
725
|
self.response_acc.update_state(acc)
|
|
729
726
|
|
|
@@ -796,10 +793,10 @@ class DIET2DIET(DIET):
|
|
|
796
793
|
(self.text_name, self.config),
|
|
797
794
|
(self.label_name, label_config),
|
|
798
795
|
]:
|
|
799
|
-
self._tf_layers[
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
796
|
+
self._tf_layers[f"sequence_layer.{attribute}"] = (
|
|
797
|
+
rasa_layers.RasaSequenceLayer(
|
|
798
|
+
attribute, self.data_signature[attribute], config
|
|
799
|
+
)
|
|
803
800
|
)
|
|
804
801
|
|
|
805
802
|
if self.config[MASKED_LM]:
|
rasa/nlu/test.py
CHANGED
|
@@ -886,7 +886,6 @@ def evaluate_entities(
|
|
|
886
886
|
exclude_label=NO_ENTITY,
|
|
887
887
|
)
|
|
888
888
|
if output_directory:
|
|
889
|
-
|
|
890
889
|
_dump_report(output_directory, f"{extractor}_report.json", report)
|
|
891
890
|
|
|
892
891
|
if successes:
|
|
@@ -1550,7 +1549,6 @@ async def combine_result(
|
|
|
1550
1549
|
|
|
1551
1550
|
|
|
1552
1551
|
def _contains_entity_labels(entity_results: List[EntityEvaluationResult]) -> bool:
|
|
1553
|
-
|
|
1554
1552
|
for result in entity_results:
|
|
1555
1553
|
if result.entity_targets or result.entity_predictions:
|
|
1556
1554
|
return True
|
|
@@ -1791,7 +1789,6 @@ async def compare_nlu(
|
|
|
1791
1789
|
training_examples_per_run = []
|
|
1792
1790
|
|
|
1793
1791
|
for run in range(runs):
|
|
1794
|
-
|
|
1795
1792
|
logger.info("Beginning comparison run {}/{}".format(run + 1, runs))
|
|
1796
1793
|
|
|
1797
1794
|
run_path = os.path.join(output, "run_{}".format(run + 1))
|
|
@@ -25,7 +25,7 @@ from transformers import ( # noqa: E402
|
|
|
25
25
|
RobertaTokenizer,
|
|
26
26
|
CamembertTokenizer,
|
|
27
27
|
)
|
|
28
|
-
from rasa.nlu.utils.hugging_face.transformers_pre_post_processors import ( # noqa: E402
|
|
28
|
+
from rasa.nlu.utils.hugging_face.transformers_pre_post_processors import ( # noqa: E402
|
|
29
29
|
bert_tokens_pre_processor,
|
|
30
30
|
gpt_tokens_pre_processor,
|
|
31
31
|
xlnet_tokens_pre_processor,
|