rasa-pro 3.10.15__py3-none-any.whl → 3.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rasa-pro might be problematic. Click here for more details.
- rasa/__main__.py +31 -15
- rasa/api.py +12 -2
- rasa/cli/arguments/default_arguments.py +24 -4
- rasa/cli/arguments/run.py +15 -0
- rasa/cli/arguments/shell.py +5 -1
- rasa/cli/arguments/train.py +17 -9
- rasa/cli/evaluate.py +7 -7
- rasa/cli/inspect.py +19 -7
- rasa/cli/interactive.py +1 -0
- rasa/cli/project_templates/calm/config.yml +5 -7
- rasa/cli/project_templates/calm/endpoints.yml +15 -2
- rasa/cli/project_templates/tutorial/config.yml +8 -5
- rasa/cli/project_templates/tutorial/data/flows.yml +1 -1
- rasa/cli/project_templates/tutorial/data/patterns.yml +5 -0
- rasa/cli/project_templates/tutorial/domain.yml +14 -0
- rasa/cli/project_templates/tutorial/endpoints.yml +5 -0
- rasa/cli/run.py +7 -0
- rasa/cli/scaffold.py +4 -2
- rasa/cli/studio/upload.py +0 -15
- rasa/cli/train.py +14 -53
- rasa/cli/utils.py +14 -11
- rasa/cli/x.py +7 -7
- rasa/constants.py +3 -1
- rasa/core/actions/action.py +77 -33
- rasa/core/actions/action_hangup.py +29 -0
- rasa/core/actions/action_repeat_bot_messages.py +89 -0
- rasa/core/actions/e2e_stub_custom_action_executor.py +5 -1
- rasa/core/actions/http_custom_action_executor.py +4 -0
- rasa/core/agent.py +2 -2
- rasa/core/brokers/kafka.py +3 -1
- rasa/core/brokers/pika.py +3 -1
- rasa/core/channels/__init__.py +10 -6
- rasa/core/channels/channel.py +41 -4
- rasa/core/channels/development_inspector.py +150 -46
- rasa/core/channels/inspector/README.md +1 -1
- rasa/core/channels/inspector/dist/assets/{arc-b6e548fe.js → arc-bc141fb2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{c4Diagram-d0fbc5ce-fa03ac9e.js → c4Diagram-d0fbc5ce-be2db283.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-936ed81e-ee67392a.js → classDiagram-936ed81e-55366915.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{classDiagram-v2-c3cb15f1-9b283fae.js → classDiagram-v2-c3cb15f1-bb529518.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{createText-62fc7601-8b6fcc2a.js → createText-62fc7601-b0ec81d6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{edges-f2ad444c-22e77f4f.js → edges-f2ad444c-6166330c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{erDiagram-9d236eb7-60ffc87f.js → erDiagram-9d236eb7-5ccc6a8e.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDb-1972c806-9dd802e4.js → flowDb-1972c806-fca3bfe4.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{flowDiagram-7ea5b25a-5fa1912f.js → flowDiagram-7ea5b25a-4739080f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-736177bf.js +1 -0
- rasa/core/channels/inspector/dist/assets/{flowchart-elk-definition-abe16c3d-622a1fd2.js → flowchart-elk-definition-abe16c3d-7c1b0e0f.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{ganttDiagram-9b5ea136-e285a63a.js → ganttDiagram-9b5ea136-772fd050.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{gitGraphDiagram-99d0ae7c-f237bdca.js → gitGraphDiagram-99d0ae7c-8eae1dc9.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{index-2c4b9a3b-4b03d70e.js → index-2c4b9a3b-f55afcdf.js} +1 -1
- rasa/core/channels/inspector/dist/assets/index-e7cef9de.js +1317 -0
- rasa/core/channels/inspector/dist/assets/{infoDiagram-736b4530-72a0fa5f.js → infoDiagram-736b4530-124d4a14.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{journeyDiagram-df861f2b-82218c41.js → journeyDiagram-df861f2b-7c4fae44.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{layout-78cff630.js → layout-b9885fb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{line-5038b469.js → line-7c59abb6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{linear-c4fc4098.js → linear-4776f780.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{mindmap-definition-beec6740-c33c8ea6.js → mindmap-definition-beec6740-2332c46c.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{pieDiagram-dbbf0591-a8d03059.js → pieDiagram-dbbf0591-8fb39303.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{quadrantDiagram-4d7f4fd6-6a0e56b2.js → quadrantDiagram-4d7f4fd6-3c7180a2.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{requirementDiagram-6fc4c22a-2dc7c7bd.js → requirementDiagram-6fc4c22a-e910bcb8.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sankeyDiagram-8f13d901-2360fe39.js → sankeyDiagram-8f13d901-ead16c89.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{sequenceDiagram-b655622a-41b9f9ad.js → sequenceDiagram-b655622a-29a02a19.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-59f0c015-0aad326f.js → stateDiagram-59f0c015-042b3137.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{stateDiagram-v2-2b26beab-9847d984.js → stateDiagram-v2-2b26beab-2178c0f3.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-080da4f6-564d890e.js → styles-080da4f6-23ffa4fc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-3dcbcfbf-38957613.js → styles-3dcbcfbf-94f59763.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{styles-9c745c82-f0fc6921.js → styles-9c745c82-78a6bebc.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{svgDrawCommon-4835440b-ef3c5a77.js → svgDrawCommon-4835440b-eae2a6f6.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{timeline-definition-5b62e21b-bf3e91c1.js → timeline-definition-5b62e21b-5c968d92.js} +1 -1
- rasa/core/channels/inspector/dist/assets/{xychartDiagram-2b33534f-4d4026c0.js → xychartDiagram-2b33534f-fd3db0d5.js} +1 -1
- rasa/core/channels/inspector/dist/index.html +18 -15
- rasa/core/channels/inspector/index.html +17 -14
- rasa/core/channels/inspector/package.json +5 -1
- rasa/core/channels/inspector/src/App.tsx +118 -68
- rasa/core/channels/inspector/src/components/Chat.tsx +95 -0
- rasa/core/channels/inspector/src/components/DiagramFlow.tsx +11 -10
- rasa/core/channels/inspector/src/components/DialogueStack.tsx +10 -25
- rasa/core/channels/inspector/src/components/LoadingSpinner.tsx +6 -3
- rasa/core/channels/inspector/src/helpers/audiostream.ts +165 -0
- rasa/core/channels/inspector/src/helpers/formatters.test.ts +10 -0
- rasa/core/channels/inspector/src/helpers/formatters.ts +107 -41
- rasa/core/channels/inspector/src/helpers/utils.ts +92 -7
- rasa/core/channels/inspector/src/types.ts +21 -1
- rasa/core/channels/inspector/yarn.lock +94 -1
- rasa/core/channels/rest.py +51 -46
- rasa/core/channels/socketio.py +28 -1
- rasa/core/channels/telegram.py +1 -1
- rasa/core/channels/twilio.py +1 -1
- rasa/core/channels/{audiocodes.py → voice_ready/audiocodes.py} +122 -69
- rasa/core/channels/{voice_aware → voice_ready}/jambonz.py +26 -8
- rasa/core/channels/{voice_aware → voice_ready}/jambonz_protocol.py +57 -5
- rasa/core/channels/{twilio_voice.py → voice_ready/twilio_voice.py} +64 -28
- rasa/core/channels/voice_ready/utils.py +37 -0
- rasa/core/channels/voice_stream/asr/__init__.py +0 -0
- rasa/core/channels/voice_stream/asr/asr_engine.py +89 -0
- rasa/core/channels/voice_stream/asr/asr_event.py +18 -0
- rasa/core/channels/voice_stream/asr/azure.py +129 -0
- rasa/core/channels/voice_stream/asr/deepgram.py +90 -0
- rasa/core/channels/voice_stream/audio_bytes.py +8 -0
- rasa/core/channels/voice_stream/browser_audio.py +107 -0
- rasa/core/channels/voice_stream/call_state.py +23 -0
- rasa/core/channels/voice_stream/tts/__init__.py +0 -0
- rasa/core/channels/voice_stream/tts/azure.py +106 -0
- rasa/core/channels/voice_stream/tts/cartesia.py +118 -0
- rasa/core/channels/voice_stream/tts/tts_cache.py +27 -0
- rasa/core/channels/voice_stream/tts/tts_engine.py +58 -0
- rasa/core/channels/voice_stream/twilio_media_streams.py +173 -0
- rasa/core/channels/voice_stream/util.py +57 -0
- rasa/core/channels/voice_stream/voice_channel.py +427 -0
- rasa/core/information_retrieval/qdrant.py +1 -0
- rasa/core/nlg/contextual_response_rephraser.py +45 -17
- rasa/{nlu → core}/persistor.py +203 -68
- rasa/core/policies/enterprise_search_policy.py +119 -63
- rasa/core/policies/flows/flow_executor.py +15 -22
- rasa/core/policies/intentless_policy.py +83 -28
- rasa/core/processor.py +25 -0
- rasa/core/run.py +12 -2
- rasa/core/secrets_manager/constants.py +4 -0
- rasa/core/secrets_manager/factory.py +8 -0
- rasa/core/secrets_manager/vault.py +11 -1
- rasa/core/training/interactive.py +33 -34
- rasa/core/utils.py +47 -21
- rasa/dialogue_understanding/coexistence/llm_based_router.py +41 -14
- rasa/dialogue_understanding/commands/__init__.py +6 -0
- rasa/dialogue_understanding/commands/repeat_bot_messages_command.py +60 -0
- rasa/dialogue_understanding/commands/session_end_command.py +61 -0
- rasa/dialogue_understanding/commands/user_silence_command.py +59 -0
- rasa/dialogue_understanding/commands/utils.py +5 -0
- rasa/dialogue_understanding/generator/constants.py +2 -0
- rasa/dialogue_understanding/generator/flow_retrieval.py +47 -9
- rasa/dialogue_understanding/generator/llm_based_command_generator.py +38 -15
- rasa/dialogue_understanding/generator/llm_command_generator.py +1 -1
- rasa/dialogue_understanding/generator/multi_step/multi_step_llm_command_generator.py +35 -13
- rasa/dialogue_understanding/generator/single_step/command_prompt_template.jinja2 +3 -0
- rasa/dialogue_understanding/generator/single_step/single_step_llm_command_generator.py +60 -13
- rasa/dialogue_understanding/patterns/default_flows_for_patterns.yml +53 -0
- rasa/dialogue_understanding/patterns/repeat.py +37 -0
- rasa/dialogue_understanding/patterns/user_silence.py +37 -0
- rasa/dialogue_understanding/processor/command_processor.py +21 -1
- rasa/e2e_test/aggregate_test_stats_calculator.py +1 -11
- rasa/e2e_test/assertions.py +136 -61
- rasa/e2e_test/assertions_schema.yml +23 -0
- rasa/e2e_test/e2e_test_case.py +85 -6
- rasa/e2e_test/e2e_test_runner.py +2 -3
- rasa/engine/graph.py +0 -1
- rasa/engine/loader.py +12 -0
- rasa/engine/recipes/config_files/default_config.yml +0 -3
- rasa/engine/recipes/default_recipe.py +0 -1
- rasa/engine/recipes/graph_recipe.py +0 -1
- rasa/engine/runner/dask.py +2 -2
- rasa/engine/storage/local_model_storage.py +12 -42
- rasa/engine/storage/storage.py +1 -5
- rasa/engine/validation.py +527 -74
- rasa/model_manager/__init__.py +0 -0
- rasa/model_manager/config.py +40 -0
- rasa/model_manager/model_api.py +559 -0
- rasa/model_manager/runner_service.py +286 -0
- rasa/model_manager/socket_bridge.py +146 -0
- rasa/model_manager/studio_jwt_auth.py +86 -0
- rasa/model_manager/trainer_service.py +325 -0
- rasa/model_manager/utils.py +87 -0
- rasa/model_manager/warm_rasa_process.py +187 -0
- rasa/model_service.py +112 -0
- rasa/model_training.py +42 -23
- rasa/nlu/tokenizers/whitespace_tokenizer.py +3 -14
- rasa/server.py +4 -2
- rasa/shared/constants.py +60 -8
- rasa/shared/core/constants.py +13 -0
- rasa/shared/core/domain.py +107 -50
- rasa/shared/core/events.py +29 -0
- rasa/shared/core/flows/flow.py +5 -0
- rasa/shared/core/flows/flows_list.py +19 -6
- rasa/shared/core/flows/flows_yaml_schema.json +10 -0
- rasa/shared/core/flows/utils.py +39 -0
- rasa/shared/core/flows/validation.py +121 -0
- rasa/shared/core/flows/yaml_flows_io.py +15 -27
- rasa/shared/core/slots.py +5 -0
- rasa/shared/importers/importer.py +59 -41
- rasa/shared/importers/multi_project.py +23 -11
- rasa/shared/importers/rasa.py +12 -3
- rasa/shared/importers/remote_importer.py +196 -0
- rasa/shared/importers/utils.py +3 -1
- rasa/shared/nlu/training_data/formats/rasa_yaml.py +18 -3
- rasa/shared/nlu/training_data/training_data.py +18 -19
- rasa/shared/providers/_configs/litellm_router_client_config.py +220 -0
- rasa/shared/providers/_configs/model_group_config.py +167 -0
- rasa/shared/providers/_configs/openai_client_config.py +1 -1
- rasa/shared/providers/_configs/rasa_llm_client_config.py +73 -0
- rasa/shared/providers/_configs/self_hosted_llm_client_config.py +1 -0
- rasa/shared/providers/_configs/utils.py +16 -0
- rasa/shared/providers/_utils.py +79 -0
- rasa/shared/providers/embedding/_base_litellm_embedding_client.py +13 -29
- rasa/shared/providers/embedding/azure_openai_embedding_client.py +54 -21
- rasa/shared/providers/embedding/default_litellm_embedding_client.py +24 -0
- rasa/shared/providers/embedding/litellm_router_embedding_client.py +135 -0
- rasa/shared/providers/llm/_base_litellm_client.py +34 -22
- rasa/shared/providers/llm/azure_openai_llm_client.py +50 -29
- rasa/shared/providers/llm/default_litellm_llm_client.py +24 -0
- rasa/shared/providers/llm/litellm_router_llm_client.py +182 -0
- rasa/shared/providers/llm/rasa_llm_client.py +112 -0
- rasa/shared/providers/llm/self_hosted_llm_client.py +5 -29
- rasa/shared/providers/mappings.py +19 -0
- rasa/shared/providers/router/__init__.py +0 -0
- rasa/shared/providers/router/_base_litellm_router_client.py +183 -0
- rasa/shared/providers/router/router_client.py +73 -0
- rasa/shared/utils/common.py +40 -24
- rasa/shared/utils/health_check/__init__.py +0 -0
- rasa/shared/utils/health_check/embeddings_health_check_mixin.py +31 -0
- rasa/shared/utils/health_check/health_check.py +258 -0
- rasa/shared/utils/health_check/llm_health_check_mixin.py +31 -0
- rasa/shared/utils/io.py +27 -6
- rasa/shared/utils/llm.py +353 -43
- rasa/shared/utils/schemas/events.py +2 -0
- rasa/shared/utils/schemas/model_config.yml +0 -10
- rasa/shared/utils/yaml.py +181 -38
- rasa/studio/data_handler.py +3 -1
- rasa/studio/upload.py +160 -74
- rasa/telemetry.py +94 -17
- rasa/tracing/config.py +3 -1
- rasa/tracing/instrumentation/attribute_extractors.py +95 -18
- rasa/tracing/instrumentation/instrumentation.py +121 -0
- rasa/utils/common.py +5 -0
- rasa/utils/endpoints.py +27 -1
- rasa/utils/io.py +8 -16
- rasa/utils/log_utils.py +9 -2
- rasa/utils/sanic_error_handler.py +32 -0
- rasa/validator.py +110 -4
- rasa/version.py +1 -1
- {rasa_pro-3.10.15.dist-info → rasa_pro-3.11.0.dist-info}/METADATA +14 -12
- {rasa_pro-3.10.15.dist-info → rasa_pro-3.11.0.dist-info}/RECORD +234 -183
- rasa/core/channels/inspector/dist/assets/flowDiagram-v2-855bc5b3-1844e5a5.js +0 -1
- rasa/core/channels/inspector/dist/assets/index-a5d3e69d.js +0 -1040
- rasa/core/channels/voice_aware/utils.py +0 -20
- rasa/llm_fine_tuning/notebooks/unsloth_finetuning.ipynb +0 -407
- /rasa/core/channels/{voice_aware → voice_ready}/__init__.py +0 -0
- /rasa/core/channels/{voice_native → voice_stream}/__init__.py +0 -0
- {rasa_pro-3.10.15.dist-info → rasa_pro-3.11.0.dist-info}/NOTICE +0 -0
- {rasa_pro-3.10.15.dist-info → rasa_pro-3.11.0.dist-info}/WHEEL +0 -0
- {rasa_pro-3.10.15.dist-info → rasa_pro-3.11.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import structlog
|
|
2
|
-
|
|
3
|
-
from rasa.utils.licensing import (
|
|
4
|
-
PRODUCT_AREA,
|
|
5
|
-
VOICE_SCOPE,
|
|
6
|
-
validate_license_from_env,
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
structlogger = structlog.get_logger()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def validate_voice_license_scope() -> None:
|
|
13
|
-
"""Validate that the correct license scope is present."""
|
|
14
|
-
structlogger.info(
|
|
15
|
-
f"Validating current Rasa Pro license scope which must include "
|
|
16
|
-
f"the '{VOICE_SCOPE}' scope to use the voice channel."
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
voice_product_scope = PRODUCT_AREA + " " + VOICE_SCOPE
|
|
20
|
-
validate_license_from_env(product_area=voice_product_scope)
|
|
@@ -1,407 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"cells": [
|
|
3
|
-
{
|
|
4
|
-
"cell_type": "markdown",
|
|
5
|
-
"metadata": {},
|
|
6
|
-
"source": [
|
|
7
|
-
"# Fine-tuning a Hugging Face base model using Unsloth and TRL\n",
|
|
8
|
-
"\n",
|
|
9
|
-
"This is a worked example of how to efficiently fine-tune a base language model from [Hugging Face Hub](https://huggingface.co/models) using the [Unsloth](https://docs.unsloth.ai) and [TRL](https://huggingface.co/docs/trl/en/index) libraries on an instruction-based task.\n",
|
|
10
|
-
"\n",
|
|
11
|
-
"Unsloth integrates with TRL in order to reduce the time and GPU memory required to fine-tune LLMs, when compared to using TRL exclusively.\n",
|
|
12
|
-
"\n",
|
|
13
|
-
"To run fine-tuning, you must have first [generated the dataset](https://rasa.com/rasa-pro/docs/operating/fine-tuning-recipe) files `train.jsonl` and `val.jsonl`, which must be in the [TRL instruction format](https://huggingface.co/docs/trl/en/sft_trainer#dataset-format-support)."
|
|
14
|
-
]
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"cell_type": "markdown",
|
|
18
|
-
"metadata": {},
|
|
19
|
-
"source": [
|
|
20
|
-
"## 1. Configure fine-tuning environment\n",
|
|
21
|
-
"\n",
|
|
22
|
-
"This notebook has been tested on on a [GCP Vertex AI Workbench instance](https://cloud.google.com/vertex-ai/docs/workbench/instances/introduction) with machine type `a2-highgpu-1g` in the `asia-southeast1-b` zone, which had the following hardware:\n",
|
|
23
|
-
"- Single NVIDIA A100 GPU (40GB VRAM)\n",
|
|
24
|
-
"- 12 core CPU with 85B RAM\n",
|
|
25
|
-
"- 256GB disk\n",
|
|
26
|
-
"\n",
|
|
27
|
-
"It has also been tested on on a [AWS Sagemaker Notebook instance](https://docs.aws.amazon.com/sagemaker/latest/dg/nbi.html) with machine type `ml.g4dn.4xlarge` in the `eu-central-1` region, which had the following hardware:\n",
|
|
28
|
-
"- Single NVIDIA T4 GPU (16GB VRAM)\n",
|
|
29
|
-
"- 16 core CPU with 64GB RAM\n",
|
|
30
|
-
"- 256GB disk\n",
|
|
31
|
-
"\n",
|
|
32
|
-
"In both setups, the notebook was executed in a Linux environment with the following software already installed:\n",
|
|
33
|
-
"- Python 3.10\n",
|
|
34
|
-
"- CUDA Toolkit 12.1\n",
|
|
35
|
-
"- PyTorch 2.2\n",
|
|
36
|
-
"\n",
|
|
37
|
-
"In order to run fine-tuning yourself, upload this notebook along with your dataset files to your own environment.\n",
|
|
38
|
-
"\n",
|
|
39
|
-
"It is highly recommended that you restart your notebook kernel and re-run all notebook cells every time you wish to perform fine-tuning.\n",
|
|
40
|
-
"\n",
|
|
41
|
-
"> Despite the fact that this notebook will work with a relatively underpowered GPU, such as the NVIDIA T4, running LLM fine-tuning and inference will be very slow.\n",
|
|
42
|
-
">\n",
|
|
43
|
-
"> It is highly recommended that you use an NVIDIA A100 or other similar GPU types.\n",
|
|
44
|
-
">\n",
|
|
45
|
-
"> The code presented here has been configured for use with an NVIDIA A100, please take note of later comments on the changes you should make to the code when using a different type of GPU.\n"
|
|
46
|
-
]
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
"cell_type": "markdown",
|
|
50
|
-
"metadata": {},
|
|
51
|
-
"source": [
|
|
52
|
-
"## 2. Install Python requirements\n",
|
|
53
|
-
"\n",
|
|
54
|
-
"The following `pip` commands will install Unsloth and other required Python packages when [Conda](https://anaconda.org/anaconda/conda) is used to manage your Python environment, which is the case for many Jupyter notebook runtimes.\n",
|
|
55
|
-
"\n",
|
|
56
|
-
"If Conda is not used in your environment, please follow [these alternative instructions](https://github.com/unslothai/unsloth?tab=readme-ov-file#pip-installation) for installing Unsloth."
|
|
57
|
-
]
|
|
58
|
-
},
|
|
59
|
-
{
|
|
60
|
-
"cell_type": "code",
|
|
61
|
-
"execution_count": null,
|
|
62
|
-
"metadata": {},
|
|
63
|
-
"outputs": [],
|
|
64
|
-
"source": [
|
|
65
|
-
"%%sh\n",
|
|
66
|
-
"# install unsloth and other dependencies\n",
|
|
67
|
-
"pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
|
|
68
|
-
"pip install --no-deps \"xformers<=0.0.26\" \"trl<0.9.0\" peft accelerate bitsandbytes huggingface_hub[cli]\n",
|
|
69
|
-
"# remove tpu-only package that is installed by default on gcp runtimes, even when only using gpu\n",
|
|
70
|
-
"pip uninstall torch-xla -y"
|
|
71
|
-
]
|
|
72
|
-
},
|
|
73
|
-
{
|
|
74
|
-
"cell_type": "markdown",
|
|
75
|
-
"metadata": {},
|
|
76
|
-
"source": [
|
|
77
|
-
"## 3. Download base model\n",
|
|
78
|
-
"\n",
|
|
79
|
-
"You can download the model you want to fine-tune from Hugging Face Hub using the [official CLI](https://huggingface.co/docs/huggingface_hub/en/guides/cli) with an [API access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token) as per the code below. Make sure you first update the `HUGGINGFACE_TOKEN` and `BASE_MODEL` environment variables with your own values.\n",
|
|
80
|
-
"\n",
|
|
81
|
-
"When testing this notebook, the [Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) model was used. Note that `meta-llama/Meta-Llama-3.1-8B-Instruct` is a [gated model](https://huggingface.co/docs/hub/en/models-gated) that you must first request access to before using. \n",
|
|
82
|
-
"\n",
|
|
83
|
-
"You can use any other PyTorch model available on [Hugging Face Hub](https://huggingface.co/models). It is recommended that you use a model that has been pre-trained on instructional tasks, such as the [CodeLlama 13B Instruct](https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf) model.\n",
|
|
84
|
-
"\n",
|
|
85
|
-
"Pre-trained models with more parameters will generally perform better at tasks than models with fewer parameters. However, the size of model you can use is limited by how much memory your GPU has.\n",
|
|
86
|
-
"\n",
|
|
87
|
-
"Alternatively, if you already have a PyTorch model directory to hand, you can upload it to your notebook environment manually."
|
|
88
|
-
]
|
|
89
|
-
},
|
|
90
|
-
{
|
|
91
|
-
"cell_type": "code",
|
|
92
|
-
"execution_count": null,
|
|
93
|
-
"metadata": {},
|
|
94
|
-
"outputs": [],
|
|
95
|
-
"source": [
|
|
96
|
-
"%%sh\n",
|
|
97
|
-
"# TODO: update with your values\n",
|
|
98
|
-
"export HUGGINGFACE_TOKEN=\"CHANGEME\"\n",
|
|
99
|
-
"export BASE_MODEL=\"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
|
|
100
|
-
"\n",
|
|
101
|
-
"# download model\n",
|
|
102
|
-
"huggingface-cli download \"${BASE_MODEL}\" \\\n",
|
|
103
|
-
" --token \"${HUGGINGFACE_TOKEN}\" \\\n",
|
|
104
|
-
" --local-dir \"./base_model\" \\\n",
|
|
105
|
-
" --exclude \"*.bin*\""
|
|
106
|
-
]
|
|
107
|
-
},
|
|
108
|
-
{
|
|
109
|
-
"cell_type": "markdown",
|
|
110
|
-
"metadata": {},
|
|
111
|
-
"source": [
|
|
112
|
-
"## 4. Load and quantize base model\n",
|
|
113
|
-
"\n",
|
|
114
|
-
"The [quantization of model parameters](https://huggingface.co/docs/optimum/en/concept_guides/quantization) can significantly reduce the GPU memory required to run model fine-tuning and inference, at the cost of model accuracy.\n",
|
|
115
|
-
"\n",
|
|
116
|
-
"Here, the base model is loaded from disk and quantized into an 8-bit representation on the fly using the [BitsAndBytes](https://huggingface.co/docs/transformers/main/en/quantization/bitsandbytes) library.\n",
|
|
117
|
-
"\n",
|
|
118
|
-
"If you are using a GPU with relatively little memory, such as the NVIDIA T4, or if you are using a base model larger than `meta-llama/Meta-Llama-3.1-8B-Instruct`, you may be required to use 4-bit quantization (e.g. `load_in_4bit = True`) in order to avoid \n",
|
|
119
|
-
"out of memory (OOM) errors."
|
|
120
|
-
]
|
|
121
|
-
},
|
|
122
|
-
{
|
|
123
|
-
"cell_type": "code",
|
|
124
|
-
"execution_count": null,
|
|
125
|
-
"metadata": {},
|
|
126
|
-
"outputs": [],
|
|
127
|
-
"source": [
|
|
128
|
-
"from transformers import BitsAndBytesConfig\n",
|
|
129
|
-
"from unsloth import FastLanguageModel\n",
|
|
130
|
-
"\n",
|
|
131
|
-
"# configure quantization method for base model\n",
|
|
132
|
-
"quantization_config = BitsAndBytesConfig(\n",
|
|
133
|
-
" load_in_8bit=True,\n",
|
|
134
|
-
")\n",
|
|
135
|
-
"\n",
|
|
136
|
-
"# load quantized model and tokenizer from disk\n",
|
|
137
|
-
"model, tokenizer = FastLanguageModel.from_pretrained(\n",
|
|
138
|
-
" model_name=\"./base_model\",\n",
|
|
139
|
-
" max_seq_length=2048,\n",
|
|
140
|
-
" quantization_config=quantization_config,\n",
|
|
141
|
-
")"
|
|
142
|
-
]
|
|
143
|
-
},
|
|
144
|
-
{
|
|
145
|
-
"cell_type": "markdown",
|
|
146
|
-
"metadata": {},
|
|
147
|
-
"source": [
|
|
148
|
-
"## 5. Load training and validation datasets\n",
|
|
149
|
-
"\n",
|
|
150
|
-
"The following code loads the training and validation datasets from the `train.jsonl` and `val.json` files, respectively\n",
|
|
151
|
-
"\n",
|
|
152
|
-
"As the files use the TRL instruction format, the TRL trainer used later will be able to [automatically parse](https://huggingface.co/docs/trl/en/sft_trainer#dataset-format-support) the datasets and [generate the prompts from a template](https://huggingface.co/docs/transformers/en/chat_templating) configured in the tokenizer.\n",
|
|
153
|
-
"\n",
|
|
154
|
-
"Prompt templates vary between models and TRL will infer the correct template from your base model. If this is not available in your base model or if you wish to change it, you can set your own [template string](https://huggingface.co/docs/transformers/en/chat_templating#advanced-adding-and-editing-chat-templates).\n",
|
|
155
|
-
"\n",
|
|
156
|
-
"You can also define your own [prompt formatting function](https://huggingface.co/docs/trl/en/sft_trainer#format-your-input-prompts) in order to have full control of how the prompts are constructed."
|
|
157
|
-
]
|
|
158
|
-
},
|
|
159
|
-
{
|
|
160
|
-
"cell_type": "code",
|
|
161
|
-
"execution_count": null,
|
|
162
|
-
"metadata": {},
|
|
163
|
-
"outputs": [],
|
|
164
|
-
"source": [
|
|
165
|
-
"import datasets\n",
|
|
166
|
-
"from trl.extras.dataset_formatting import get_formatting_func_from_dataset\n",
|
|
167
|
-
"\n",
|
|
168
|
-
"# load datasets from disk\n",
|
|
169
|
-
"train_dataset = datasets.load_dataset(\n",
|
|
170
|
-
" \"json\", data_files={\"train\": \"train.jsonl\"}, split=\"train\"\n",
|
|
171
|
-
")\n",
|
|
172
|
-
"eval_dataset = datasets.load_dataset(\n",
|
|
173
|
-
" \"json\", data_files={\"eval\": \"val.jsonl\"}, split=\"eval\"\n",
|
|
174
|
-
")\n",
|
|
175
|
-
"\n",
|
|
176
|
-
"# test prompt templating on example from dataset\n",
|
|
177
|
-
"print(get_formatting_func_from_dataset(train_dataset, tokenizer)(eval_dataset[0]))"
|
|
178
|
-
]
|
|
179
|
-
},
|
|
180
|
-
{
|
|
181
|
-
"cell_type": "markdown",
|
|
182
|
-
"metadata": {},
|
|
183
|
-
"source": [
|
|
184
|
-
"## 6. Configure trainer\n",
|
|
185
|
-
"\n",
|
|
186
|
-
"Below, the arguments for the supervised fine-tuning (SFT) trainer are configured. Their values were chosen somewhat arbitrarily and resulted in satisfactory results during testing.\n",
|
|
187
|
-
"\n",
|
|
188
|
-
"It is recommended that you read the official documentation and experiment with the arguments passed to `SFTConfig` (see [here](https://huggingface.co/docs/trl/main/en/sft_trainer#trl.SFTTrainer)) and `SFTTrainer` (see [here](https://huggingface.co/docs/trl/main/en/sft_trainer#trl.SFTTrainer)).\n",
|
|
189
|
-
"\n",
|
|
190
|
-
"For example:\n",
|
|
191
|
-
"- If you get an OOM error when running fine-tuning, you can reduce `per_device_train_batch_size` in order to reduce the memory footprint. However, if your GPU has sufficient memory, you can try increasing it in order to reduce the total number of training steps.\n",
|
|
192
|
-
"- Consider setting `max_steps`, as you may not need to perform all epochs in order to achieve optimal model accuracy. Conversely, you may see better model accuracy by increasing `num_train_epochs`.\n",
|
|
193
|
-
"- If fine-tuning is taking too long, you can increase `eval_steps` in order to reduce how often validation is performed. "
|
|
194
|
-
]
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
"cell_type": "code",
|
|
198
|
-
"execution_count": 6,
|
|
199
|
-
"metadata": {},
|
|
200
|
-
"outputs": [],
|
|
201
|
-
"source": [
|
|
202
|
-
"import torch\n",
|
|
203
|
-
"from transformers import TrainingArguments\n",
|
|
204
|
-
"from trl import SFTTrainer\n",
|
|
205
|
-
"\n",
|
|
206
|
-
"# configure training args\n",
|
|
207
|
-
"args = TrainingArguments(\n",
|
|
208
|
-
" # training\n",
|
|
209
|
-
" per_device_train_batch_size=8,\n",
|
|
210
|
-
" warmup_steps=50,\n",
|
|
211
|
-
" num_train_epochs=4,\n",
|
|
212
|
-
" learning_rate=0.0001,\n",
|
|
213
|
-
" lr_scheduler_type=\"cosine\",\n",
|
|
214
|
-
" optim=\"adamw_bnb_8bit\",\n",
|
|
215
|
-
" weight_decay=0.0,\n",
|
|
216
|
-
" logging_steps=1,\n",
|
|
217
|
-
" # datatypes\n",
|
|
218
|
-
" fp16=not torch.cuda.is_bf16_supported(),\n",
|
|
219
|
-
" bf16=torch.cuda.is_bf16_supported(),\n",
|
|
220
|
-
" # evaluation\n",
|
|
221
|
-
" eval_strategy=\"steps\",\n",
|
|
222
|
-
" eval_steps=50,\n",
|
|
223
|
-
" per_device_eval_batch_size=8,\n",
|
|
224
|
-
" output_dir=\"outputs\",\n",
|
|
225
|
-
")\n",
|
|
226
|
-
"\n",
|
|
227
|
-
"# setup trainer\n",
|
|
228
|
-
"trainer = SFTTrainer(\n",
|
|
229
|
-
" model=model,\n",
|
|
230
|
-
" tokenizer=tokenizer,\n",
|
|
231
|
-
" train_dataset=train_dataset,\n",
|
|
232
|
-
" eval_dataset=eval_dataset,\n",
|
|
233
|
-
" args=args,\n",
|
|
234
|
-
")"
|
|
235
|
-
]
|
|
236
|
-
},
|
|
237
|
-
{
|
|
238
|
-
"cell_type": "markdown",
|
|
239
|
-
"metadata": {},
|
|
240
|
-
"source": [
|
|
241
|
-
"## 7. Perform supervised fine-tuning\n",
|
|
242
|
-
"\n",
|
|
243
|
-
"In the code below, fine-tuning is performed using the previously congfigured trainer.\n",
|
|
244
|
-
"\n",
|
|
245
|
-
"When testing this step on an NVIDIA A100 using the configuration defined above, it took around 12 minutes to perform fine-tuning with a training dataset containing around 500 examples.\n",
|
|
246
|
-
"\n",
|
|
247
|
-
"After fine-tuning, the base model and fine-tuned adapters are [merged together and saved to disk](https://docs.unsloth.ai/basics/saving-models/saving-to-vllm) in 16-bit for future compatibility with the [vLLM](https://github.com/vllm-project/vllm) model serving library.\n",
|
|
248
|
-
"\n",
|
|
249
|
-
"If you are using a relatively small GPU, such as the NVIDIA T4, you may have to save the model in 4-bit instead (e.g. `save_method = \"merged_4bit_forced\"`)."
|
|
250
|
-
]
|
|
251
|
-
},
|
|
252
|
-
{
|
|
253
|
-
"cell_type": "code",
|
|
254
|
-
"execution_count": null,
|
|
255
|
-
"metadata": {},
|
|
256
|
-
"outputs": [],
|
|
257
|
-
"source": [
|
|
258
|
-
"# run fine-tuning\n",
|
|
259
|
-
"finetune_metrics = trainer.train()\n",
|
|
260
|
-
"\n",
|
|
261
|
-
"# save model to disk in 16-bit\n",
|
|
262
|
-
"model.save_pretrained_merged(\n",
|
|
263
|
-
" \"./finetune_model\", tokenizer, save_method=\"merged_16bit\"\n",
|
|
264
|
-
")"
|
|
265
|
-
]
|
|
266
|
-
},
|
|
267
|
-
{
|
|
268
|
-
"cell_type": "markdown",
|
|
269
|
-
"metadata": {},
|
|
270
|
-
"source": [
|
|
271
|
-
"## 8. Visualize fine-tuning metrics\n",
|
|
272
|
-
"\n",
|
|
273
|
-
"Some of the metrics collected during fine-tuning are visualised below in order for you to diagnose any potential issues with the model.\n",
|
|
274
|
-
"\n",
|
|
275
|
-
"Specifically, the training and validation losses are plotted against the training step number. Please check the plot for the following:\n",
|
|
276
|
-
"- Ideally, as the fine-tuning steps increase, the training and validation losses should decrease and converge. \n",
|
|
277
|
-
"- If both loss curves do not converge, it may be worth performing more fine-tuning steps or epochs. This is known as [underfitting](https://www.ibm.com/topics/underfitting).\n",
|
|
278
|
-
"- If the validation loss suddenly starts to increase while the training loss continues to decrease or converge, you should decrease your total number of steps or epochs. This is known as [overfitting](https://www.ibm.com/topics/overfitting)."
|
|
279
|
-
]
|
|
280
|
-
},
|
|
281
|
-
{
|
|
282
|
-
"cell_type": "code",
|
|
283
|
-
"execution_count": null,
|
|
284
|
-
"metadata": {},
|
|
285
|
-
"outputs": [],
|
|
286
|
-
"source": [
|
|
287
|
-
"import pandas as pd\n",
|
|
288
|
-
"import matplotlib.pyplot as plt\n",
|
|
289
|
-
"\n",
|
|
290
|
-
"# plot step against train and val losses\n",
|
|
291
|
-
"fig, ax = plt.subplots()\n",
|
|
292
|
-
"log_history = pd.DataFrame(trainer.state.log_history)\n",
|
|
293
|
-
"eval_loss = (\n",
|
|
294
|
-
" log_history[[\"step\", \"eval_loss\"]]\n",
|
|
295
|
-
" .dropna()\n",
|
|
296
|
-
" .plot(x=\"step\", ax=ax)\n",
|
|
297
|
-
")\n",
|
|
298
|
-
"train_loss = (\n",
|
|
299
|
-
" log_history[[\"step\", \"loss\"]]\n",
|
|
300
|
-
" .dropna()\n",
|
|
301
|
-
" .plot(x=\"step\", ax=ax)\n",
|
|
302
|
-
")\n",
|
|
303
|
-
"fig.show()"
|
|
304
|
-
]
|
|
305
|
-
},
|
|
306
|
-
{
|
|
307
|
-
"cell_type": "markdown",
|
|
308
|
-
"metadata": {},
|
|
309
|
-
"source": [
|
|
310
|
-
"## 9. Run ad hoc inference\n",
|
|
311
|
-
"\n",
|
|
312
|
-
"You can load your fine-tuned model from disk using Unsloth and use it to run optimized inference on individual inputs of your choosing using the code below.\n",
|
|
313
|
-
"\n",
|
|
314
|
-
"Note that the inputs passed to model are in the [TRL convertsational format](https://huggingface.co/docs/trl/en/sft_trainer#dataset-format-support) as the Hugging Face [chat template requires them to be](https://huggingface.co/docs/transformers/main/en/chat_templating#how-do-i-use-chat-templates). During training TRL will [automatically convert the instruction format to the conversational format](https://github.com/huggingface/trl/blob/main/trl/extras/dataset_formatting.py). However, you have to do this yourself when applying chat templates manually for inference."
|
|
315
|
-
]
|
|
316
|
-
},
|
|
317
|
-
{
|
|
318
|
-
"cell_type": "code",
|
|
319
|
-
"execution_count": null,
|
|
320
|
-
"metadata": {},
|
|
321
|
-
"outputs": [],
|
|
322
|
-
"source": [
|
|
323
|
-
"from transformers import TextStreamer\n",
|
|
324
|
-
"from unsloth import FastLanguageModel\n",
|
|
325
|
-
"\n",
|
|
326
|
-
"model, tokenizer = FastLanguageModel.from_pretrained(\"./finetune_model\")\n",
|
|
327
|
-
"FastLanguageModel.for_inference(model) # enable inference optimizations\n",
|
|
328
|
-
"streamer = TextStreamer(tokenizer) # stream model outputs as they are generated\n",
|
|
329
|
-
"\n",
|
|
330
|
-
"# the content to include in the input prompt\n",
|
|
331
|
-
"# by default, a value from the validation dataset as example\n",
|
|
332
|
-
"content = eval_dataset[\"prompt\"][0]\n",
|
|
333
|
-
"\n",
|
|
334
|
-
"# apply prompt template and tokenize\n",
|
|
335
|
-
"input_ids = tokenizer.apply_chat_template(\n",
|
|
336
|
-
" [{\"role\": \"user\", \"content\": content}], # in the TRL conversational format\n",
|
|
337
|
-
" tokenize=True,\n",
|
|
338
|
-
" add_generation_prompt=True,\n",
|
|
339
|
-
" return_tensors=\"pt\",\n",
|
|
340
|
-
").to(\"cuda\")\n",
|
|
341
|
-
"\n",
|
|
342
|
-
"# generate model output from user input\n",
|
|
343
|
-
"_ = model.generate(\n",
|
|
344
|
-
" input_ids=input_ids,\n",
|
|
345
|
-
" streamer=streamer, # remove streamer if you want whole output at end\n",
|
|
346
|
-
" max_new_tokens=64, # set the limit on how many tokens are generated\n",
|
|
347
|
-
" do_sample=False, # disable random sampling for deterministic outputs\n",
|
|
348
|
-
")"
|
|
349
|
-
]
|
|
350
|
-
},
|
|
351
|
-
{
|
|
352
|
-
"cell_type": "markdown",
|
|
353
|
-
"metadata": {},
|
|
354
|
-
"source": [
|
|
355
|
-
"## 10. Export fine-tuned model\n",
|
|
356
|
-
"\n",
|
|
357
|
-
"Lastly, export your fine-tuned model directory to an appropriate storage location that can be easily accessed later for [deployment](https://rasa.com/rasa-pro/docs/building-assistants/self-hosted-llm).\n",
|
|
358
|
-
"\n",
|
|
359
|
-
"It is recommended that you use a cloud object store, such as [Amazon S3](https://aws.amazon.com/s3/) or [Google Cloud Storage](https://cloud.google.com/storage).\n",
|
|
360
|
-
"\n",
|
|
361
|
-
"Uncomment and run the corresponding commands below for your cloud provider, making sure to first update the environment variables with your own values. It is assumed that:\n",
|
|
362
|
-
"- your bucket already exists\n",
|
|
363
|
-
"- you have already installed the CLI tool for your cloud provider\n",
|
|
364
|
-
"- you have already authenticated with your cloud provider and have sufficient permissions to write to your bucket"
|
|
365
|
-
]
|
|
366
|
-
},
|
|
367
|
-
{
|
|
368
|
-
"cell_type": "code",
|
|
369
|
-
"execution_count": null,
|
|
370
|
-
"metadata": {},
|
|
371
|
-
"outputs": [],
|
|
372
|
-
"source": [
|
|
373
|
-
"%%sh\n",
|
|
374
|
-
"export LOCAL_MODEL_PATH=\"./finetune_model\"\n",
|
|
375
|
-
"\n",
|
|
376
|
-
"# if using amazon\n",
|
|
377
|
-
"# export S3_MODEL_URI=\"s3://CHANGEME\" # update with your value\n",
|
|
378
|
-
"# aws s3 cp \"${LOCAL_MODEL_PATH}\" \"${S3_MODEL_URI}\" --recursive\n",
|
|
379
|
-
"\n",
|
|
380
|
-
"# if using google\n",
|
|
381
|
-
"# export GCS_MODEL_URI=\"gs://CHANGEME\" # update with your value\n",
|
|
382
|
-
"# gsutil cp -r \"${LOCAL_MODEL_PATH}\" \"${GCS_MODEL_URI}\""
|
|
383
|
-
]
|
|
384
|
-
}
|
|
385
|
-
],
|
|
386
|
-
"metadata": {
|
|
387
|
-
"kernelspec": {
|
|
388
|
-
"display_name": "Python 3",
|
|
389
|
-
"language": "python",
|
|
390
|
-
"name": "python3"
|
|
391
|
-
},
|
|
392
|
-
"language_info": {
|
|
393
|
-
"codemirror_mode": {
|
|
394
|
-
"name": "ipython",
|
|
395
|
-
"version": 3
|
|
396
|
-
},
|
|
397
|
-
"file_extension": ".py",
|
|
398
|
-
"mimetype": "text/x-python",
|
|
399
|
-
"name": "python",
|
|
400
|
-
"nbconvert_exporter": "python",
|
|
401
|
-
"pygments_lexer": "ipython3",
|
|
402
|
-
"version": "3.10.14"
|
|
403
|
-
}
|
|
404
|
-
},
|
|
405
|
-
"nbformat": 4,
|
|
406
|
-
"nbformat_minor": 4
|
|
407
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|