rasa-pro 3.13.0.dev5__py3-none-any.whl → 3.13.0.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +0 -3
- rasa/api.py +5 -1
- rasa/cli/arguments/default_arguments.py +13 -1
- rasa/cli/arguments/train.py +2 -0
- rasa/cli/dialogue_understanding_test.py +1 -1
- rasa/cli/e2e_test.py +1 -1
- rasa/cli/evaluate.py +2 -2
- rasa/cli/export.py +3 -3
- rasa/cli/llm_fine_tuning.py +12 -11
- rasa/cli/project_templates/defaults.py +133 -0
- rasa/cli/run.py +1 -1
- rasa/cli/studio/link.py +53 -0
- rasa/cli/studio/pull.py +78 -0
- rasa/cli/studio/push.py +78 -0
- rasa/cli/studio/studio.py +12 -0
- rasa/cli/studio/upload.py +8 -0
- rasa/cli/train.py +2 -1
- rasa/cli/utils.py +1 -1
- rasa/cli/x.py +1 -1
- rasa/constants.py +4 -0
- rasa/core/__init__.py +0 -16
- rasa/core/actions/action.py +5 -1
- rasa/core/actions/action_repeat_bot_messages.py +18 -22
- rasa/core/actions/action_run_slot_rejections.py +0 -1
- rasa/core/agent.py +18 -3
- rasa/core/available_endpoints.py +146 -0
- rasa/core/brokers/kafka.py +4 -0
- rasa/core/brokers/pika.py +5 -2
- rasa/core/brokers/sql.py +1 -1
- rasa/core/channels/botframework.py +2 -2
- rasa/core/channels/channel.py +2 -2
- rasa/core/channels/hangouts.py +8 -5
- rasa/core/channels/inspector/.eslintrc.cjs +12 -6
- rasa/core/channels/inspector/.prettierrc +5 -0
- rasa/core/channels/inspector/README.md +10 -4
- rasa/core/channels/inspector/dist/assets/{arc-9f75cc3b.js → arc-c4b064fc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{blockDiagram-38ab4fdb-7f34db23.js → blockDiagram-38ab4fdb-215b5026.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-3d4e48cf-948bab2c.js → c4Diagram-3d4e48cf-2b54a0a3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/channel-3730f5fd.js +1 -0
- rasa/core/channels/inspector/dist/assets/{classDiagram-70f12bd4-53b0dd0e.js → classDiagram-70f12bd4-daacea5f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-f2320105-fdf789e7.js → classDiagram-v2-f2320105-930d4dc2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/clone-e847561e.js +1 -0
- rasa/core/channels/inspector/dist/assets/{createText-2e5e7dd3-87c4ece5.js → createText-2e5e7dd3-83c206ba.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-e0da2a9e-5a8b0749.js → edges-e0da2a9e-b0eb01d0.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9861fffd-66da90e2.js → erDiagram-9861fffd-17586500.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-956e92f1-10044f05.js → flowDb-956e92f1-be2a1776.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-66a62f08-f338f66a.js → flowDiagram-66a62f08-c2120ebd.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-efbbfe00.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-4a651766-b13140aa.js → flowchart-elk-definition-4a651766-a6ab5c48.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-c361ad54-f2b4a55a.js → ganttDiagram-c361ad54-ef613457.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-72cf32ee-dedc298d.js → gitGraphDiagram-72cf32ee-d59185b3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{graph-4ede11ff.js → graph-0f155405.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3862675e-65549d37.js → index-3862675e-d5f1d1b7.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-3a23e736.js → index-47737d3a.js} +123 -123
- rasa/core/channels/inspector/dist/assets/{infoDiagram-f8f76790-65439671.js → infoDiagram-f8f76790-b07d141f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-49397b02-56d03d98.js → journeyDiagram-49397b02-1936d429.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-dd48f7f4.js → layout-dde8d0f3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-1569ad2c.js → line-0c2c7ee0.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-48bf4935.js → linear-35dd89a4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-fc14e90a-688504c1.js → mindmap-definition-fc14e90a-56192851.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-8a3498a8-78b6d7e6.js → pieDiagram-8a3498a8-fc21ed78.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-120e2f19-048b84b3.js → quadrantDiagram-120e2f19-25e98518.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-deff3bca-dd67f107.js → requirementDiagram-deff3bca-546ff1f5.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-04a897e0-8128436e.js → sankeyDiagram-04a897e0-02d8b82d.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-704730f1-1a0d1461.js → sequenceDiagram-704730f1-3ca5a92e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-587899a1-46d388ed.js → stateDiagram-587899a1-128ea07c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-d93cdb3a-ea42951a.js → stateDiagram-v2-d93cdb3a-95f290af.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-6aaf32cf-7427ed0c.js → styles-6aaf32cf-4984898a.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9a916d00-ff5e5a16.js → styles-9a916d00-1bf266ba.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-c10674c1-7b3680cf.js → styles-c10674c1-60521c63.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-08f97a94-f860f2ad.js → svgDrawCommon-08f97a94-a25b6e12.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-85554ec2-2eebf0c8.js → timeline-definition-85554ec2-0fc086bf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-e933f94c-5d7f4e96.js → xychartDiagram-e933f94c-44ee592e.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +1 -1
- rasa/core/channels/inspector/package.json +3 -1
- rasa/core/channels/inspector/src/App.tsx +91 -90
- rasa/core/channels/inspector/src/components/Chat.tsx +45 -41
- rasa/core/channels/inspector/src/components/DiagramFlow.tsx +40 -40
- rasa/core/channels/inspector/src/components/DialogueInformation.tsx +57 -57
- rasa/core/channels/inspector/src/components/DialogueStack.tsx +36 -27
- rasa/core/channels/inspector/src/components/ExpandIcon.tsx +4 -4
- rasa/core/channels/inspector/src/components/FullscreenButton.tsx +7 -7
- rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +28 -12
- rasa/core/channels/inspector/src/components/NoActiveFlow.tsx +9 -9
- rasa/core/channels/inspector/src/components/RasaLogo.tsx +5 -5
- rasa/core/channels/inspector/src/components/RecruitmentPanel.tsx +55 -60
- rasa/core/channels/inspector/src/components/SaraDiagrams.tsx +5 -5
- rasa/core/channels/inspector/src/components/Slots.tsx +22 -22
- rasa/core/channels/inspector/src/components/Welcome.tsx +28 -31
- rasa/core/channels/inspector/src/helpers/audio/audiostream.ts +245 -0
- rasa/core/channels/inspector/src/helpers/audio/microphone-processor.js +12 -0
- rasa/core/channels/inspector/src/helpers/audio/playback-processor.js +36 -0
- rasa/core/channels/inspector/src/helpers/conversation.ts +7 -7
- rasa/core/channels/inspector/src/helpers/formatters.test.ts +181 -181
- rasa/core/channels/inspector/src/helpers/formatters.ts +111 -111
- rasa/core/channels/inspector/src/helpers/utils.ts +78 -61
- rasa/core/channels/inspector/src/main.tsx +8 -8
- rasa/core/channels/inspector/src/theme/Button/Button.ts +8 -8
- rasa/core/channels/inspector/src/theme/Heading/Heading.ts +7 -7
- rasa/core/channels/inspector/src/theme/Input/Input.ts +9 -9
- rasa/core/channels/inspector/src/theme/Link/Link.ts +6 -6
- rasa/core/channels/inspector/src/theme/Modal/Modal.ts +13 -13
- rasa/core/channels/inspector/src/theme/Table/Table.tsx +10 -10
- rasa/core/channels/inspector/src/theme/Tooltip/Tooltip.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/breakpoints.ts +7 -7
- rasa/core/channels/inspector/src/theme/base/colors.ts +64 -64
- rasa/core/channels/inspector/src/theme/base/fonts/fontFaces.css +21 -18
- rasa/core/channels/inspector/src/theme/base/radii.ts +8 -8
- rasa/core/channels/inspector/src/theme/base/shadows.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/sizes.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/space.ts +12 -12
- rasa/core/channels/inspector/src/theme/base/styles.ts +5 -5
- rasa/core/channels/inspector/src/theme/base/typography.ts +12 -12
- rasa/core/channels/inspector/src/theme/base/zIndices.ts +3 -3
- rasa/core/channels/inspector/src/theme/index.ts +38 -38
- rasa/core/channels/inspector/src/types.ts +56 -50
- rasa/core/channels/inspector/yarn.lock +5 -0
- rasa/core/channels/mattermost.py +1 -1
- rasa/core/channels/rasa_chat.py +2 -4
- rasa/core/channels/rest.py +5 -4
- rasa/core/channels/studio_chat.py +3 -2
- rasa/core/channels/vier_cvg.py +1 -2
- rasa/core/channels/voice_ready/audiocodes.py +35 -25
- rasa/core/channels/voice_stream/audiocodes.py +7 -4
- rasa/core/channels/voice_stream/genesys.py +2 -2
- rasa/core/channels/voice_stream/twilio_media_streams.py +10 -5
- rasa/core/channels/voice_stream/voice_channel.py +33 -22
- rasa/core/evaluation/marker_tracker_loader.py +1 -1
- rasa/core/exporter.py +1 -1
- rasa/core/http_interpreter.py +3 -7
- rasa/core/jobs.py +2 -1
- rasa/core/nlg/contextual_response_rephraser.py +38 -11
- rasa/core/nlg/generator.py +0 -1
- rasa/core/nlg/interpolator.py +2 -3
- rasa/core/nlg/summarize.py +40 -6
- rasa/core/persistor.py +55 -20
- rasa/core/policies/enterprise_search_policy.py +290 -66
- rasa/core/policies/enterprise_search_prompt_with_relevancy_check_and_citation_template.jinja2 +63 -0
- rasa/core/policies/flow_policy.py +1 -1
- rasa/core/policies/flows/flow_executor.py +96 -17
- rasa/core/policies/intentless_policy.py +24 -16
- rasa/core/processor.py +106 -53
- rasa/core/run.py +40 -13
- rasa/core/tracker_stores/__init__.py +0 -0
- rasa/core/{auth_retry_tracker_store.py → tracker_stores/auth_retry_tracker_store.py} +5 -1
- rasa/core/tracker_stores/dynamo_tracker_store.py +218 -0
- rasa/core/tracker_stores/mongo_tracker_store.py +206 -0
- rasa/core/tracker_stores/redis_tracker_store.py +219 -0
- rasa/core/tracker_stores/sql_tracker_store.py +555 -0
- rasa/core/tracker_stores/tracker_store.py +805 -0
- rasa/core/training/interactive.py +1 -1
- rasa/core/utils.py +24 -91
- rasa/dialogue_understanding/coexistence/intent_based_router.py +2 -1
- rasa/dialogue_understanding/coexistence/llm_based_router.py +8 -3
- rasa/dialogue_understanding/commands/can_not_handle_command.py +2 -0
- rasa/dialogue_understanding/commands/cancel_flow_command.py +2 -0
- rasa/dialogue_understanding/commands/chit_chat_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/clarify_command.py +6 -2
- rasa/dialogue_understanding/commands/command_syntax_manager.py +1 -0
- rasa/dialogue_understanding/commands/human_handoff_command.py +2 -0
- rasa/dialogue_understanding/commands/knowledge_answer_command.py +2 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +2 -0
- rasa/dialogue_understanding/commands/set_slot_command.py +11 -1
- rasa/dialogue_understanding/commands/skip_question_command.py +2 -0
- rasa/dialogue_understanding/commands/start_flow_command.py +4 -0
- rasa/dialogue_understanding/commands/utils.py +26 -2
- rasa/dialogue_understanding/generator/__init__.py +7 -1
- rasa/dialogue_understanding/generator/command_generator.py +4 -2
- rasa/dialogue_understanding/generator/command_parser.py +2 -2
- rasa/dialogue_understanding/generator/command_parser_validator.py +63 -0
- rasa/dialogue_understanding/generator/constants.py +2 -2
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +1 -1
- rasa/dialogue_understanding/generator/prompt_templates/command_prompt_v3_gpt_4o_2024_11_20_template.jinja2 +78 -0
- rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +28 -463
- rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +147 -0
- rasa/dialogue_understanding/generator/single_step/single_step_based_llm_command_generator.py +477 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +8 -58
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +37 -25
- rasa/dialogue_understanding/patterns/domain_for_patterns.py +190 -0
- rasa/dialogue_understanding/processor/command_processor.py +3 -3
- rasa/dialogue_understanding/processor/command_processor_component.py +3 -3
- rasa/dialogue_understanding/stack/frames/flow_stack_frame.py +17 -4
- rasa/dialogue_understanding/utils.py +68 -12
- rasa/dialogue_understanding_test/du_test_case.py +1 -1
- rasa/dialogue_understanding_test/du_test_runner.py +4 -22
- rasa/dialogue_understanding_test/test_case_simulation/test_case_tracker_simulator.py +2 -6
- rasa/e2e_test/e2e_test_runner.py +1 -1
- rasa/engine/constants.py +1 -1
- rasa/engine/recipes/default_recipe.py +26 -2
- rasa/engine/validation.py +3 -2
- rasa/hooks.py +2 -30
- rasa/keys +1 -0
- rasa/llm_fine_tuning/annotation_module.py +39 -9
- rasa/llm_fine_tuning/conversations.py +3 -0
- rasa/llm_fine_tuning/llm_data_preparation_module.py +66 -49
- rasa/llm_fine_tuning/paraphrasing/conversation_rephraser.py +4 -2
- rasa/llm_fine_tuning/paraphrasing/rephrase_validator.py +52 -44
- rasa/llm_fine_tuning/paraphrasing_module.py +10 -12
- rasa/llm_fine_tuning/storage.py +4 -4
- rasa/llm_fine_tuning/utils.py +63 -1
- rasa/model_manager/config.py +3 -1
- rasa/model_manager/model_api.py +89 -2
- rasa/model_manager/runner_service.py +8 -4
- rasa/model_manager/trainer_service.py +5 -4
- rasa/model_training.py +12 -3
- rasa/nlu/extractors/crf_entity_extractor.py +66 -16
- rasa/plugin.py +2 -12
- rasa/privacy/__init__.py +0 -0
- rasa/privacy/constants.py +83 -0
- rasa/privacy/event_broker_utils.py +77 -0
- rasa/privacy/privacy_config.py +281 -0
- rasa/privacy/privacy_config_schema.json +86 -0
- rasa/privacy/privacy_filter.py +340 -0
- rasa/privacy/privacy_manager.py +576 -0
- rasa/server.py +29 -4
- rasa/shared/constants.py +6 -0
- rasa/shared/core/constants.py +4 -3
- rasa/shared/core/domain.py +7 -0
- rasa/shared/core/events.py +99 -3
- rasa/shared/core/flows/flow.py +1 -2
- rasa/shared/core/flows/flows_yaml_schema.json +3 -0
- rasa/shared/core/flows/steps/collect.py +46 -2
- rasa/shared/core/slots.py +28 -0
- rasa/shared/exceptions.py +4 -0
- rasa/shared/providers/_configs/azure_openai_client_config.py +4 -0
- rasa/shared/providers/_configs/openai_client_config.py +4 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +3 -0
- rasa/shared/providers/llm/_base_litellm_client.py +5 -2
- rasa/shared/utils/llm.py +161 -6
- rasa/shared/utils/yaml.py +32 -0
- rasa/studio/data_handler.py +3 -3
- rasa/studio/download/download.py +37 -60
- rasa/studio/download/flows.py +23 -31
- rasa/studio/link.py +200 -0
- rasa/studio/pull.py +94 -0
- rasa/studio/push.py +131 -0
- rasa/studio/upload.py +117 -67
- rasa/telemetry.py +84 -27
- rasa/tracing/config.py +4 -5
- rasa/tracing/constants.py +19 -1
- rasa/tracing/instrumentation/attribute_extractors.py +11 -3
- rasa/tracing/instrumentation/instrumentation.py +54 -3
- rasa/tracing/instrumentation/metrics.py +98 -15
- rasa/tracing/metric_instrument_provider.py +75 -3
- rasa/utils/common.py +1 -27
- rasa/utils/licensing.py +1 -2
- rasa/utils/log_utils.py +1 -45
- rasa/validator.py +2 -8
- rasa/version.py +1 -1
- {rasa_pro-3.13.0.dev5.dist-info → rasa_pro-3.13.0.dev8.dist-info}/METADATA +8 -9
- {rasa_pro-3.13.0.dev5.dist-info → rasa_pro-3.13.0.dev8.dist-info}/RECORD +254 -231
- rasa/anonymization/__init__.py +0 -2
- rasa/anonymization/anonymisation_rule_yaml_reader.py +0 -91
- rasa/anonymization/anonymization_pipeline.py +0 -286
- rasa/anonymization/anonymization_rule_executor.py +0 -266
- rasa/anonymization/anonymization_rule_orchestrator.py +0 -119
- rasa/anonymization/schemas/config.yml +0 -47
- rasa/anonymization/utils.py +0 -118
- rasa/core/channels/inspector/dist/assets/channel-dfa68278.js +0 -1
- rasa/core/channels/inspector/dist/assets/clone-edb7f119.js +0 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-96b9c2cf-65e7c670.js +0 -1
- rasa/core/channels/inspector/src/helpers/audiostream.ts +0 -191
- rasa/core/tracker_store.py +0 -1792
- {rasa_pro-3.13.0.dev5.dist-info → rasa_pro-3.13.0.dev8.dist-info}/NOTICE +0 -0
- {rasa_pro-3.13.0.dev5.dist-info → rasa_pro-3.13.0.dev8.dist-info}/WHEEL +0 -0
- {rasa_pro-3.13.0.dev5.dist-info → rasa_pro-3.13.0.dev8.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Tuple
|
|
1
|
+
from typing import Any, Dict, List, Tuple
|
|
2
2
|
|
|
3
3
|
import structlog
|
|
4
4
|
from tqdm import tqdm
|
|
5
5
|
|
|
6
|
-
from rasa.
|
|
7
|
-
LLMBasedCommandGenerator,
|
|
8
|
-
)
|
|
6
|
+
from rasa.core.agent import Agent
|
|
9
7
|
from rasa.llm_fine_tuning.conversations import Conversation
|
|
10
8
|
from rasa.llm_fine_tuning.paraphrasing.conversation_rephraser import (
|
|
11
9
|
ConversationRephraser,
|
|
@@ -28,8 +26,7 @@ async def create_paraphrased_conversations(
|
|
|
28
26
|
rephrase_config: Dict[str, Any],
|
|
29
27
|
num_rephrases: int,
|
|
30
28
|
flows: FlowsList,
|
|
31
|
-
|
|
32
|
-
llm_command_generator_config: Dict[str, Any],
|
|
29
|
+
agent: Agent,
|
|
33
30
|
storage_context: StorageContext,
|
|
34
31
|
) -> Tuple[List[Conversation], Dict[str, Any]]:
|
|
35
32
|
"""Create paraphrased conversations.
|
|
@@ -42,7 +39,7 @@ async def create_paraphrased_conversations(
|
|
|
42
39
|
rephrase_config: The path to the rephrase configuration file.
|
|
43
40
|
num_rephrases: The number of rephrases to produce per user message.
|
|
44
41
|
flows: All flows.
|
|
45
|
-
|
|
42
|
+
agent: The Rasa agent.
|
|
46
43
|
storage_context: The storage context.
|
|
47
44
|
|
|
48
45
|
Returns:
|
|
@@ -50,7 +47,7 @@ async def create_paraphrased_conversations(
|
|
|
50
47
|
rephrasing.
|
|
51
48
|
"""
|
|
52
49
|
rephraser = ConversationRephraser(rephrase_config)
|
|
53
|
-
validator = RephraseValidator(
|
|
50
|
+
validator = RephraseValidator(flows)
|
|
54
51
|
|
|
55
52
|
if num_rephrases <= 0:
|
|
56
53
|
structlogger.info(
|
|
@@ -64,18 +61,19 @@ async def create_paraphrased_conversations(
|
|
|
64
61
|
rephrased_conversations: List[Conversation] = []
|
|
65
62
|
for i in tqdm(range(len(conversations))):
|
|
66
63
|
current_conversation = conversations[i]
|
|
67
|
-
|
|
68
64
|
try:
|
|
69
65
|
# rephrase all user messages even if rephrase=False is set
|
|
70
66
|
# to not confuse the LLM and get valid output
|
|
71
67
|
rephrasings = await rephraser.rephrase_conversation(
|
|
72
|
-
|
|
68
|
+
current_conversation, num_rephrases
|
|
73
69
|
)
|
|
74
70
|
# filter out the rephrasings for user messages that have rephrase=False set
|
|
75
|
-
rephrasings = _filter_rephrasings(rephrasings,
|
|
71
|
+
rephrasings = _filter_rephrasings(rephrasings, current_conversation)
|
|
76
72
|
# check if the rephrasings are still producing the same commands
|
|
77
73
|
rephrasings = await validator.validate_rephrasings(
|
|
78
|
-
|
|
74
|
+
agent,
|
|
75
|
+
rephrasings,
|
|
76
|
+
current_conversation,
|
|
79
77
|
)
|
|
80
78
|
except ProviderClientAPIException as e:
|
|
81
79
|
structlogger.error(
|
rasa/llm_fine_tuning/storage.py
CHANGED
|
@@ -96,9 +96,9 @@ class FileStorageStrategy(StorageStrategy):
|
|
|
96
96
|
file_path = self._get_file_path(storage_location)
|
|
97
97
|
self._create_output_dir(file_path)
|
|
98
98
|
|
|
99
|
-
with open(str(file_path), "w") as outfile:
|
|
99
|
+
with open(str(file_path), "w", encoding="utf-8") as outfile:
|
|
100
100
|
for example in llm_data:
|
|
101
|
-
json.dump(example.as_dict(), outfile)
|
|
101
|
+
json.dump(example.as_dict(), outfile, ensure_ascii=False)
|
|
102
102
|
outfile.write("\n")
|
|
103
103
|
|
|
104
104
|
def write_formatted_finetuning_data(
|
|
@@ -110,9 +110,9 @@ class FileStorageStrategy(StorageStrategy):
|
|
|
110
110
|
file_path = self._get_file_path(module_storage_location, file_name)
|
|
111
111
|
self._create_output_dir(file_path)
|
|
112
112
|
|
|
113
|
-
with open(str(file_path), "w") as file:
|
|
113
|
+
with open(str(file_path), "w", encoding="utf-8") as file:
|
|
114
114
|
for example in formatted_data:
|
|
115
|
-
json.dump(example.as_dict(), file)
|
|
115
|
+
json.dump(example.as_dict(), file, ensure_ascii=False)
|
|
116
116
|
file.write("\n")
|
|
117
117
|
|
|
118
118
|
def write_e2e_test_suite_to_yaml_file(
|
rasa/llm_fine_tuning/utils.py
CHANGED
|
@@ -1,7 +1,69 @@
|
|
|
1
|
-
from
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Callable, Generator, List, Union
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
2
6
|
|
|
3
7
|
from rasa.dialogue_understanding.commands.prompt_command import PromptCommand
|
|
8
|
+
from rasa.dialogue_understanding.generator import LLMBasedCommandGenerator
|
|
9
|
+
from rasa.shared.providers.llm.llm_response import LLMResponse
|
|
10
|
+
|
|
11
|
+
structlogger = structlog.get_logger()
|
|
4
12
|
|
|
5
13
|
|
|
6
14
|
def commands_as_string(commands: List[PromptCommand], delimiter: str = "\n") -> str:
|
|
7
15
|
return delimiter.join([command.to_dsl() for command in commands])
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def make_mock_invoke_llm(commands: str) -> Callable:
|
|
19
|
+
"""Capture the `commands` in a closure so the resulting async function
|
|
20
|
+
can use it as its response.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
commands: The commands to return from the mock LLM call.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
async def _mock_invoke_llm(
|
|
27
|
+
self: LLMBasedCommandGenerator, prompt: Union[List[dict], List[str], str]
|
|
28
|
+
) -> LLMResponse:
|
|
29
|
+
structlogger.debug(
|
|
30
|
+
f"LLM call intercepted, response mocked. "
|
|
31
|
+
f"Responding with the following commands: '{commands}' "
|
|
32
|
+
f"to the prompt: {prompt}"
|
|
33
|
+
)
|
|
34
|
+
fake_response_dict = {
|
|
35
|
+
"id": "",
|
|
36
|
+
"choices": [commands],
|
|
37
|
+
"created": int(datetime.now().timestamp()),
|
|
38
|
+
"model": "mocked-llm",
|
|
39
|
+
}
|
|
40
|
+
return LLMResponse.from_dict(fake_response_dict)
|
|
41
|
+
|
|
42
|
+
return _mock_invoke_llm
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@contextmanager
|
|
46
|
+
def patch_invoke_llm_in_generators(mock_impl: Callable) -> Generator:
|
|
47
|
+
"""Replace CommandGenerator.invoke_llm in the base class AND in all
|
|
48
|
+
current subclasses (recursively). Everything is restored on exit.
|
|
49
|
+
"""
|
|
50
|
+
originals = {}
|
|
51
|
+
|
|
52
|
+
def collect(cls: type[LLMBasedCommandGenerator]) -> None:
|
|
53
|
+
# store current attribute, then recurse
|
|
54
|
+
originals[cls] = cls.invoke_llm
|
|
55
|
+
for sub in cls.__subclasses__():
|
|
56
|
+
collect(sub)
|
|
57
|
+
|
|
58
|
+
# collect every existing subclass of CommandGenerator
|
|
59
|
+
collect(LLMBasedCommandGenerator) # type: ignore[type-abstract]
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
# apply the monkey-patch everywhere
|
|
63
|
+
for cls in originals:
|
|
64
|
+
cls.invoke_llm = mock_impl # type: ignore[assignment]
|
|
65
|
+
yield
|
|
66
|
+
finally:
|
|
67
|
+
# restore originals (even if an exception happened)
|
|
68
|
+
for cls, orig in originals.items():
|
|
69
|
+
cls.invoke_llm = orig # type: ignore[assignment]
|
rasa/model_manager/config.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
3
|
|
|
4
|
+
from rasa.constants import RASA_REMOTE_STORAGE_ENV_VAR_NAME
|
|
5
|
+
|
|
4
6
|
DEFAULT_SERVER_BASE_WORKING_DIRECTORY = "working-data"
|
|
5
7
|
|
|
6
8
|
SERVER_BASE_WORKING_DIRECTORY = os.environ.get(
|
|
@@ -12,7 +14,7 @@ SERVER_PORT = os.environ.get("RASA_MODEL_SERVER_PORT", 8000)
|
|
|
12
14
|
SERVER_BASE_URL = os.environ.get("RASA_MODEL_SERVER_BASE_URL", None)
|
|
13
15
|
|
|
14
16
|
# defaults to storing on the local hard drive
|
|
15
|
-
SERVER_MODEL_REMOTE_STORAGE = os.environ.get(
|
|
17
|
+
SERVER_MODEL_REMOTE_STORAGE = os.environ.get(RASA_REMOTE_STORAGE_ENV_VAR_NAME, None)
|
|
16
18
|
|
|
17
19
|
# The path to the python executable that is running this script
|
|
18
20
|
# we will use the same python to run training / bots
|
rasa/model_manager/model_api.py
CHANGED
|
@@ -7,12 +7,16 @@ from typing import Any, Callable, Dict, Optional, Union
|
|
|
7
7
|
import dotenv
|
|
8
8
|
import psutil
|
|
9
9
|
import structlog
|
|
10
|
+
from ruamel.yaml import YAMLError
|
|
10
11
|
from sanic import Blueprint, Sanic, response
|
|
11
12
|
from sanic.exceptions import NotFound
|
|
12
13
|
from sanic.request import Request
|
|
13
14
|
from sanic.response import json
|
|
14
15
|
from socketio import AsyncServer
|
|
15
16
|
|
|
17
|
+
import rasa
|
|
18
|
+
from rasa.cli.project_templates.defaults import get_rasa_defaults
|
|
19
|
+
from rasa.cli.scaffold import ProjectTemplateName, scaffold_path
|
|
16
20
|
from rasa.constants import MODEL_ARCHIVE_EXTENSION
|
|
17
21
|
from rasa.exceptions import ModelNotFound
|
|
18
22
|
from rasa.model_manager import config
|
|
@@ -45,6 +49,10 @@ from rasa.model_manager.warm_rasa_process import (
|
|
|
45
49
|
initialize_warm_rasa_process,
|
|
46
50
|
shutdown_warm_rasa_processes,
|
|
47
51
|
)
|
|
52
|
+
from rasa.server import ErrorResponse
|
|
53
|
+
from rasa.shared.exceptions import InvalidConfigException
|
|
54
|
+
from rasa.shared.utils.yaml import dump_obj_as_yaml_to_string
|
|
55
|
+
from rasa.studio.upload import build_calm_import_parts
|
|
48
56
|
|
|
49
57
|
dotenv.load_dotenv()
|
|
50
58
|
|
|
@@ -476,6 +484,86 @@ def internal_blueprint() -> Blueprint:
|
|
|
476
484
|
except ModelNotFound:
|
|
477
485
|
return response.raw(b"", status=404)
|
|
478
486
|
|
|
487
|
+
@bp.post("/defaults")
|
|
488
|
+
async def get_defaults(request: Request) -> response.HTTPResponse:
|
|
489
|
+
"""Returns the system defaults like prompts, patterns, etc."""
|
|
490
|
+
body = request.json or {}
|
|
491
|
+
config_yaml = body.get("config")
|
|
492
|
+
if config_yaml is None:
|
|
493
|
+
exc = ErrorResponse(
|
|
494
|
+
HTTPStatus.BAD_REQUEST,
|
|
495
|
+
"BadRequest",
|
|
496
|
+
"Missing `config` key in request body.",
|
|
497
|
+
)
|
|
498
|
+
return response.json(exc.error_info, status=exc.status)
|
|
499
|
+
|
|
500
|
+
endpoints_yaml = body.get("endpoints")
|
|
501
|
+
if endpoints_yaml is None:
|
|
502
|
+
exc = ErrorResponse(
|
|
503
|
+
HTTPStatus.BAD_REQUEST,
|
|
504
|
+
"BadRequest",
|
|
505
|
+
"Missing `endpoints` key in request body.",
|
|
506
|
+
)
|
|
507
|
+
return response.json(exc.error_info, status=exc.status)
|
|
508
|
+
|
|
509
|
+
try:
|
|
510
|
+
defaults = get_rasa_defaults(config_yaml, endpoints_yaml)
|
|
511
|
+
except (YAMLError, InvalidConfigException) as e:
|
|
512
|
+
exc = ErrorResponse(
|
|
513
|
+
HTTPStatus.INTERNAL_SERVER_ERROR,
|
|
514
|
+
"InitDataError",
|
|
515
|
+
f"Failed to load defaults. Error: {e!s}",
|
|
516
|
+
)
|
|
517
|
+
return response.json(exc.error_info, status=exc.status)
|
|
518
|
+
return response.json(defaults.model_dump(exclude_none=True))
|
|
519
|
+
|
|
520
|
+
@bp.get("/project_template")
|
|
521
|
+
async def get_project_template(request: Request) -> response.HTTPResponse:
|
|
522
|
+
"""Return initial project template data."""
|
|
523
|
+
template = request.args.get("template", ProjectTemplateName.DEFAULT.value)
|
|
524
|
+
|
|
525
|
+
try:
|
|
526
|
+
template_enum = ProjectTemplateName(template)
|
|
527
|
+
except ValueError:
|
|
528
|
+
valid_templates = ", ".join([t.value for t in ProjectTemplateName])
|
|
529
|
+
exc = ErrorResponse(
|
|
530
|
+
HTTPStatus.BAD_REQUEST,
|
|
531
|
+
"BadRequest",
|
|
532
|
+
f"Unknown template '{template}'. Valid templates: "
|
|
533
|
+
f"{valid_templates}",
|
|
534
|
+
)
|
|
535
|
+
return response.json(exc.error_info, status=exc.status)
|
|
536
|
+
|
|
537
|
+
template_dir = scaffold_path(template_enum)
|
|
538
|
+
if not os.path.isdir(template_dir):
|
|
539
|
+
exc = ErrorResponse(
|
|
540
|
+
HTTPStatus.INTERNAL_SERVER_ERROR,
|
|
541
|
+
"InitDataError",
|
|
542
|
+
f"Template directory '{template_dir}' not found.",
|
|
543
|
+
)
|
|
544
|
+
return response.json(exc.error_info, status=exc.status)
|
|
545
|
+
|
|
546
|
+
assistant_name, parts = build_calm_import_parts(
|
|
547
|
+
data_path=f"{template_dir}/data",
|
|
548
|
+
domain_path=f"{template_dir}/domain",
|
|
549
|
+
config_path=f"{template_dir}/config.yml",
|
|
550
|
+
endpoints_path=f"{template_dir}/endpoints.yml",
|
|
551
|
+
assistant_name=template_enum.value,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
defaults = get_rasa_defaults(
|
|
555
|
+
config_yaml=dump_obj_as_yaml_to_string(parts.config),
|
|
556
|
+
endpoints_yaml=dump_obj_as_yaml_to_string(parts.endpoints),
|
|
557
|
+
)
|
|
558
|
+
return response.json(
|
|
559
|
+
{
|
|
560
|
+
**parts.model_dump(exclude_none=True),
|
|
561
|
+
"assistantName": assistant_name,
|
|
562
|
+
"defaults": defaults.model_dump(exclude_none=True),
|
|
563
|
+
"version": rasa.__version__,
|
|
564
|
+
}
|
|
565
|
+
)
|
|
566
|
+
|
|
479
567
|
return bp
|
|
480
568
|
|
|
481
569
|
|
|
@@ -532,8 +620,7 @@ def size_of_model(model_name: str) -> Optional[int]:
|
|
|
532
620
|
model_name=model_file_name,
|
|
533
621
|
)
|
|
534
622
|
return fetch_size_of_remote_model(
|
|
535
|
-
model_file_name,
|
|
536
|
-
config.SERVER_MODEL_REMOTE_STORAGE,
|
|
623
|
+
model_file_name, config.SERVER_MODEL_REMOTE_STORAGE, model_path
|
|
537
624
|
)
|
|
538
625
|
raise ModelNotFound("Model not found.")
|
|
539
626
|
|
|
@@ -166,13 +166,13 @@ def fetch_remote_model_to_dir(
|
|
|
166
166
|
) -> str:
|
|
167
167
|
"""Fetch the model from remote storage.
|
|
168
168
|
|
|
169
|
-
Returns the path to the model
|
|
169
|
+
Returns the path to the model directory.
|
|
170
170
|
"""
|
|
171
171
|
from rasa.core.persistor import get_persistor
|
|
172
172
|
|
|
173
173
|
persistor = get_persistor(storage_type)
|
|
174
174
|
|
|
175
|
-
# we
|
|
175
|
+
# we know there must be a persistor, because the config is set
|
|
176
176
|
# this is here to please the type checker for the call below
|
|
177
177
|
assert persistor is not None
|
|
178
178
|
|
|
@@ -182,7 +182,9 @@ def fetch_remote_model_to_dir(
|
|
|
182
182
|
raise ModelNotFound() from e
|
|
183
183
|
|
|
184
184
|
|
|
185
|
-
def fetch_size_of_remote_model(
|
|
185
|
+
def fetch_size_of_remote_model(
|
|
186
|
+
model_name: str, storage_type: str, model_path: str
|
|
187
|
+
) -> int:
|
|
186
188
|
"""Fetch the size of the model from remote storage."""
|
|
187
189
|
from rasa.core.persistor import get_persistor
|
|
188
190
|
|
|
@@ -192,7 +194,9 @@ def fetch_size_of_remote_model(model_name: str, storage_type: str) -> int:
|
|
|
192
194
|
# this is here to please the type checker for the call below
|
|
193
195
|
assert persistor is not None
|
|
194
196
|
|
|
195
|
-
return persistor.size_of_persisted_model(
|
|
197
|
+
return persistor.size_of_persisted_model(
|
|
198
|
+
model_name=model_name, target_path=model_path
|
|
199
|
+
)
|
|
196
200
|
|
|
197
201
|
|
|
198
202
|
def start_bot_process(
|
|
@@ -7,7 +7,7 @@ from typing import Any, Dict, Optional
|
|
|
7
7
|
import structlog
|
|
8
8
|
from pydantic import BaseModel, ConfigDict
|
|
9
9
|
|
|
10
|
-
from rasa.constants import MODEL_ARCHIVE_EXTENSION
|
|
10
|
+
from rasa.constants import MODEL_ARCHIVE_EXTENSION, RASA_DIR_NAME
|
|
11
11
|
from rasa.model_manager import config
|
|
12
12
|
from rasa.model_manager.utils import (
|
|
13
13
|
ensure_base_directory_exists,
|
|
@@ -171,7 +171,7 @@ def seed_training_directory_with_rasa_cache(
|
|
|
171
171
|
training_base_path=training_base_path,
|
|
172
172
|
)
|
|
173
173
|
# copy the cache to the training directory
|
|
174
|
-
shutil.copytree(src=cache_path, dst=subpath(training_base_path,
|
|
174
|
+
shutil.copytree(src=cache_path, dst=subpath(training_base_path, RASA_DIR_NAME))
|
|
175
175
|
|
|
176
176
|
|
|
177
177
|
def persist_rasa_cache(assistant_id: str, training_base_path: str) -> None:
|
|
@@ -184,12 +184,12 @@ def persist_rasa_cache(assistant_id: str, training_base_path: str) -> None:
|
|
|
184
184
|
cache_path = cache_for_assistant_path(assistant_id)
|
|
185
185
|
|
|
186
186
|
# if the training failed and didn't create a cache, skip this step
|
|
187
|
-
if not os.path.exists(subpath(training_base_path,
|
|
187
|
+
if not os.path.exists(subpath(training_base_path, RASA_DIR_NAME)):
|
|
188
188
|
return
|
|
189
189
|
|
|
190
190
|
# clean up the cache directory first
|
|
191
191
|
shutil.rmtree(cache_path, ignore_errors=True)
|
|
192
|
-
shutil.copytree(src=subpath(training_base_path,
|
|
192
|
+
shutil.copytree(src=subpath(training_base_path, RASA_DIR_NAME), dst=cache_path)
|
|
193
193
|
|
|
194
194
|
|
|
195
195
|
def write_training_data_to_files(
|
|
@@ -278,6 +278,7 @@ def start_training_process(
|
|
|
278
278
|
"--keep-local-model-copy",
|
|
279
279
|
"--remote-storage",
|
|
280
280
|
config.SERVER_MODEL_REMOTE_STORAGE,
|
|
281
|
+
"--remote-root-only",
|
|
281
282
|
]
|
|
282
283
|
)
|
|
283
284
|
|
rasa/model_training.py
CHANGED
|
@@ -160,6 +160,7 @@ async def train(
|
|
|
160
160
|
remote_storage: Optional[StorageType] = None,
|
|
161
161
|
file_importer: Optional[TrainingDataImporter] = None,
|
|
162
162
|
keep_local_model_copy: bool = False,
|
|
163
|
+
remote_root_only: bool = False,
|
|
163
164
|
) -> TrainingResult:
|
|
164
165
|
"""Trains a Rasa model (Core and NLU).
|
|
165
166
|
|
|
@@ -187,6 +188,8 @@ async def train(
|
|
|
187
188
|
If it is not provided, a new instance will be created.
|
|
188
189
|
keep_local_model_copy: If `True` the model will be stored locally even if
|
|
189
190
|
remote storage is configured.
|
|
191
|
+
remote_root_only: If `True`, the model will be stored in the root of the
|
|
192
|
+
remote model storage.
|
|
190
193
|
|
|
191
194
|
Returns:
|
|
192
195
|
An instance of `TrainingResult`.
|
|
@@ -269,6 +272,7 @@ async def train(
|
|
|
269
272
|
dry_run=dry_run,
|
|
270
273
|
remote_storage=remote_storage,
|
|
271
274
|
keep_local_model_copy=keep_local_model_copy,
|
|
275
|
+
remote_root_only=remote_root_only,
|
|
272
276
|
**(core_additional_arguments or {}),
|
|
273
277
|
**(nlu_additional_arguments or {}),
|
|
274
278
|
)
|
|
@@ -284,6 +288,7 @@ async def _train_graph(
|
|
|
284
288
|
dry_run: bool = False,
|
|
285
289
|
remote_storage: Optional[StorageType] = None,
|
|
286
290
|
keep_local_model_copy: bool = False,
|
|
291
|
+
remote_root_only: bool = False,
|
|
287
292
|
**kwargs: Any,
|
|
288
293
|
) -> TrainingResult:
|
|
289
294
|
if model_to_finetune:
|
|
@@ -363,7 +368,9 @@ async def _train_graph(
|
|
|
363
368
|
is_finetuning=is_finetuning,
|
|
364
369
|
)
|
|
365
370
|
if remote_storage:
|
|
366
|
-
push_model_to_remote_storage(
|
|
371
|
+
push_model_to_remote_storage(
|
|
372
|
+
full_model_path, remote_storage, remote_root_only
|
|
373
|
+
)
|
|
367
374
|
if not keep_local_model_copy:
|
|
368
375
|
full_model_path.unlink()
|
|
369
376
|
structlogger.info(
|
|
@@ -581,14 +588,16 @@ async def train_nlu(
|
|
|
581
588
|
).model
|
|
582
589
|
|
|
583
590
|
|
|
584
|
-
def push_model_to_remote_storage(
|
|
591
|
+
def push_model_to_remote_storage(
|
|
592
|
+
model_path: Path, remote_storage: StorageType, remote_root_only: bool = False
|
|
593
|
+
) -> None:
|
|
585
594
|
"""Push model to remote storage."""
|
|
586
595
|
from rasa.core.persistor import get_persistor
|
|
587
596
|
|
|
588
597
|
persistor = get_persistor(remote_storage)
|
|
589
598
|
|
|
590
599
|
if persistor is not None:
|
|
591
|
-
persistor.persist(str(model_path))
|
|
600
|
+
persistor.persist(str(model_path), remote_root_only)
|
|
592
601
|
|
|
593
602
|
else:
|
|
594
603
|
raise RasaException(
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import shutil
|
|
4
5
|
import typing
|
|
5
6
|
from collections import OrderedDict
|
|
6
7
|
from enum import Enum
|
|
8
|
+
from pathlib import Path
|
|
7
9
|
from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Type
|
|
8
10
|
|
|
9
11
|
import numpy as np
|
|
@@ -43,6 +45,10 @@ if typing.TYPE_CHECKING:
|
|
|
43
45
|
|
|
44
46
|
CONFIG_FEATURES = "features"
|
|
45
47
|
|
|
48
|
+
TAGGERS_DIR = "taggers"
|
|
49
|
+
CRFSUITE_MODEL_FILE_NAME = "model.crfsuite"
|
|
50
|
+
PLAIN_CRF_MODEL_FILE_NAME = "model.txt"
|
|
51
|
+
|
|
46
52
|
|
|
47
53
|
class CRFToken:
|
|
48
54
|
def __init__(
|
|
@@ -419,19 +425,11 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
419
425
|
"""Loads trained component (see parent class for full docstring)."""
|
|
420
426
|
try:
|
|
421
427
|
with model_storage.read_from(resource) as model_dir:
|
|
422
|
-
dataset = rasa.shared.utils.io.read_json_file(
|
|
423
|
-
model_dir / "crf_dataset.json"
|
|
424
|
-
)
|
|
425
428
|
crf_order = rasa.shared.utils.io.read_json_file(
|
|
426
429
|
model_dir / "crf_order.json"
|
|
427
430
|
)
|
|
428
431
|
|
|
429
|
-
|
|
430
|
-
[CRFToken.create_from_dict(token_data) for token_data in sub_list]
|
|
431
|
-
for sub_list in dataset
|
|
432
|
-
]
|
|
433
|
-
|
|
434
|
-
entity_taggers = cls.train_model(dataset, config, crf_order)
|
|
432
|
+
entity_taggers = cls._load_taggers(model_dir, config)
|
|
435
433
|
|
|
436
434
|
entity_extractor = cls(config, model_storage, resource, entity_taggers)
|
|
437
435
|
entity_extractor.crf_order = crf_order
|
|
@@ -443,19 +441,71 @@ class CRFEntityExtractor(GraphComponent, EntityExtractorMixin):
|
|
|
443
441
|
)
|
|
444
442
|
return cls(config, model_storage, resource)
|
|
445
443
|
|
|
444
|
+
@classmethod
|
|
445
|
+
def _load_taggers(
|
|
446
|
+
cls, model_dir: Path, config: Dict[Text, Any]
|
|
447
|
+
) -> Dict[str, "CRF"]:
|
|
448
|
+
"""
|
|
449
|
+
Load taggers from model directory that persists trained binary
|
|
450
|
+
`model.crfsuite` files.
|
|
451
|
+
"""
|
|
452
|
+
|
|
453
|
+
import pycrfsuite
|
|
454
|
+
import sklearn_crfsuite
|
|
455
|
+
|
|
456
|
+
# Get tagger directories
|
|
457
|
+
taggers_base = model_dir / TAGGERS_DIR
|
|
458
|
+
if not taggers_base.exists():
|
|
459
|
+
return {}
|
|
460
|
+
|
|
461
|
+
taggers_dirs = [
|
|
462
|
+
directory for directory in taggers_base.iterdir() if directory.is_dir()
|
|
463
|
+
]
|
|
464
|
+
|
|
465
|
+
entity_taggers: Dict[str, "CRF"] = {}
|
|
466
|
+
|
|
467
|
+
for tagger_dir in taggers_dirs:
|
|
468
|
+
# Instantiate sklearns CRF wrapper for the pycrfsuite's Tagger
|
|
469
|
+
entity_tagger = sklearn_crfsuite.CRF(
|
|
470
|
+
algorithm="lbfgs",
|
|
471
|
+
# coefficient for L1 penalty
|
|
472
|
+
c1=config["L1_c"],
|
|
473
|
+
# coefficient for L2 penalty
|
|
474
|
+
c2=config["L2_c"],
|
|
475
|
+
# stop earlier
|
|
476
|
+
max_iterations=config["max_iterations"],
|
|
477
|
+
# include transitions that are possible, but not observed
|
|
478
|
+
all_possible_transitions=True,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# Load pycrfsuite tagger from the persisted binary model.crfsuite file
|
|
482
|
+
entity_tagger._tagger = pycrfsuite.Tagger()
|
|
483
|
+
entity_tagger._tagger.open(str(tagger_dir / CRFSUITE_MODEL_FILE_NAME))
|
|
484
|
+
|
|
485
|
+
entity_taggers[tagger_dir.name] = entity_tagger
|
|
486
|
+
|
|
487
|
+
return entity_taggers
|
|
488
|
+
|
|
446
489
|
def persist(self, dataset: List[List[CRFToken]]) -> None:
|
|
447
490
|
"""Persist this model into the passed directory."""
|
|
448
491
|
with self._model_storage.write_to(self._resource) as model_dir:
|
|
449
|
-
data_to_store = [
|
|
450
|
-
[token.to_dict() for token in sub_list] for sub_list in dataset
|
|
451
|
-
]
|
|
452
|
-
|
|
453
|
-
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
454
|
-
model_dir / "crf_dataset.json", data_to_store
|
|
455
|
-
)
|
|
456
492
|
rasa.shared.utils.io.dump_obj_as_json_to_file(
|
|
457
493
|
model_dir / "crf_order.json", self.crf_order
|
|
458
494
|
)
|
|
495
|
+
if self.entity_taggers is not None:
|
|
496
|
+
for tag_name, entity_tagger in self.entity_taggers.items():
|
|
497
|
+
# Create the directories for storing the CRF model
|
|
498
|
+
tagger_dir = model_dir / TAGGERS_DIR / tag_name
|
|
499
|
+
tagger_dir.mkdir(parents=True, exist_ok=True)
|
|
500
|
+
# Create a plain text version of the CRF model
|
|
501
|
+
entity_tagger.tagger_.dump(
|
|
502
|
+
str(tagger_dir / PLAIN_CRF_MODEL_FILE_NAME)
|
|
503
|
+
)
|
|
504
|
+
# Persist binary version of the model.crfsuite
|
|
505
|
+
shutil.copy2(
|
|
506
|
+
src=entity_tagger.modelfile.name,
|
|
507
|
+
dst=tagger_dir / CRFSUITE_MODEL_FILE_NAME,
|
|
508
|
+
)
|
|
459
509
|
|
|
460
510
|
@classmethod
|
|
461
511
|
def _crf_tokens_to_features(
|
rasa/plugin.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import argparse
|
|
4
4
|
import functools
|
|
5
5
|
import sys
|
|
6
|
-
from typing import TYPE_CHECKING,
|
|
6
|
+
from typing import TYPE_CHECKING, List, Optional, Text, Union
|
|
7
7
|
|
|
8
8
|
import pluggy
|
|
9
9
|
|
|
@@ -11,7 +11,7 @@ from rasa.cli import SubParsersAction
|
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
13
|
from rasa.core.brokers.broker import EventBroker
|
|
14
|
-
from rasa.core.tracker_store import TrackerStore
|
|
14
|
+
from rasa.core.tracker_stores.tracker_store import TrackerStore
|
|
15
15
|
from rasa.shared.core.domain import Domain
|
|
16
16
|
from rasa.shared.core.trackers import DialogueStateTracker
|
|
17
17
|
from rasa.utils.endpoints import EndpointConfig
|
|
@@ -69,16 +69,6 @@ def create_tracker_store( # type: ignore[empty-body]
|
|
|
69
69
|
"""Hook specification for wrapping with AuthRetryTrackerStore."""
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
@hookspec(firstresult=True) # type: ignore[misc]
|
|
73
|
-
def init_anonymization_pipeline(endpoints_file: Optional[Text]) -> None:
|
|
74
|
-
"""Hook specification for initialising the anonymization pipeline."""
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
@hookspec(firstresult=True) # type: ignore[misc]
|
|
78
|
-
def get_anonymization_pipeline() -> Optional[Any]:
|
|
79
|
-
"""Hook specification for getting the anonymization pipeline."""
|
|
80
|
-
|
|
81
|
-
|
|
82
72
|
@hookspec # type: ignore[misc]
|
|
83
73
|
def after_server_stop() -> None:
|
|
84
74
|
"""Hook specification for stopping the server.
|
rasa/privacy/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
PRIVACY_CONFIG_SCHEMA = "privacy/privacy_config_schema.json"
|
|
2
|
+
REDACTION_CHAR_KEY = "redaction_char"
|
|
3
|
+
KEEP_LEFT_KEY = "keep_left"
|
|
4
|
+
KEEP_RIGHT_KEY = "keep_right"
|
|
5
|
+
DELETION_KEY = "deletion"
|
|
6
|
+
ANONYMIZATION_KEY = "anonymization"
|
|
7
|
+
TRACKER_STORE_SETTINGS = "tracker_store_settings"
|
|
8
|
+
SLOT_KEY = "slot"
|
|
9
|
+
TEXT_KEY = "text"
|
|
10
|
+
ENTITIES_KEY = "entities"
|
|
11
|
+
VALUE_KEY = "value"
|
|
12
|
+
ENTITY_LABEL_KEY = "label"
|
|
13
|
+
|
|
14
|
+
USER_CHAT_INACTIVITY_IN_MINUTES_ENV_VAR_NAME = "USER_CHAT_INACTIVITY_IN_MINUTES"
|
|
15
|
+
GLINER_MODEL_PATH_ENV_VAR_NAME = "GLINER_MODEL_PATH"
|
|
16
|
+
HUGGINGFACE_CACHE_DIR_ENV_VAR_NAME = "HUGGINGFACE_HUB_CACHE_DIR"
|
|
17
|
+
|
|
18
|
+
DEFAULT_PII_MODEL = "urchade/gliner_multi_pii-v1"
|
|
19
|
+
GLINER_LABELS = [
|
|
20
|
+
"person",
|
|
21
|
+
"organization",
|
|
22
|
+
"company",
|
|
23
|
+
"phone number",
|
|
24
|
+
"address",
|
|
25
|
+
"full address",
|
|
26
|
+
"postcode",
|
|
27
|
+
"zip code",
|
|
28
|
+
"passport number",
|
|
29
|
+
"email",
|
|
30
|
+
"credit card number",
|
|
31
|
+
"social security number",
|
|
32
|
+
"health insurance id number",
|
|
33
|
+
"date of birth",
|
|
34
|
+
"mobile phone number",
|
|
35
|
+
"bank account number",
|
|
36
|
+
"medication",
|
|
37
|
+
"cpf",
|
|
38
|
+
"driver's license number",
|
|
39
|
+
"tax identification number",
|
|
40
|
+
"medical condition",
|
|
41
|
+
"identity card number",
|
|
42
|
+
"national id number",
|
|
43
|
+
"ip address",
|
|
44
|
+
"email address",
|
|
45
|
+
"iban",
|
|
46
|
+
"credit card expiration date",
|
|
47
|
+
"username",
|
|
48
|
+
"health insurance number",
|
|
49
|
+
"registration number",
|
|
50
|
+
"student id number",
|
|
51
|
+
"insurance number",
|
|
52
|
+
"membership number",
|
|
53
|
+
"booking number",
|
|
54
|
+
"landline phone number",
|
|
55
|
+
"blood type",
|
|
56
|
+
"cvv",
|
|
57
|
+
"reservation number",
|
|
58
|
+
"digital signature",
|
|
59
|
+
"social media handle",
|
|
60
|
+
"license plate number",
|
|
61
|
+
"cnpj",
|
|
62
|
+
"postal code",
|
|
63
|
+
"passport_number",
|
|
64
|
+
"serial number",
|
|
65
|
+
"vehicle registration number",
|
|
66
|
+
"fax number",
|
|
67
|
+
"visa number",
|
|
68
|
+
"insurance company",
|
|
69
|
+
"identity document number",
|
|
70
|
+
"transaction number",
|
|
71
|
+
"national health insurance number",
|
|
72
|
+
"cvc",
|
|
73
|
+
"birth certificate number",
|
|
74
|
+
"train ticket number",
|
|
75
|
+
"passport expiration date",
|
|
76
|
+
"social_security_number",
|
|
77
|
+
"personally identifiable information",
|
|
78
|
+
"banking routing number",
|
|
79
|
+
"sort code",
|
|
80
|
+
"routing number",
|
|
81
|
+
"tax number",
|
|
82
|
+
"swift code",
|
|
83
|
+
]
|