PyPI - rasa-pro - Versions diffs - 3.11.0rc3__py3-none-any.whl → 3.11.1__py3-none-any.whl - Mend

rasa-pro 3.11.0rc3py3-none-any.whl → 3.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (48) hide show

rasa/__main__.py +9 -3
rasa/cli/studio/upload.py +0 -15
rasa/cli/utils.py +1 -1
rasa/core/channels/development_inspector.py +4 -1
rasa/core/channels/voice_stream/asr/asr_engine.py +19 -1
rasa/core/channels/voice_stream/asr/azure.py +11 -2
rasa/core/channels/voice_stream/asr/deepgram.py +4 -3
rasa/core/channels/voice_stream/tts/azure.py +3 -1
rasa/core/channels/voice_stream/tts/cartesia.py +3 -3
rasa/core/channels/voice_stream/tts/tts_engine.py +10 -1
rasa/core/information_retrieval/qdrant.py +1 -0
rasa/core/persistor.py +93 -49
rasa/core/policies/flows/flow_executor.py +18 -8
rasa/core/processor.py +7 -5
rasa/e2e_test/aggregate_test_stats_calculator.py +11 -1
rasa/e2e_test/assertions.py +133 -16
rasa/e2e_test/assertions_schema.yml +23 -0
rasa/e2e_test/e2e_test_runner.py +2 -2
rasa/engine/loader.py +12 -0
rasa/engine/validation.py +291 -79
rasa/model_manager/config.py +8 -0
rasa/model_manager/model_api.py +166 -61
rasa/model_manager/runner_service.py +31 -26
rasa/model_manager/trainer_service.py +14 -23
rasa/model_manager/warm_rasa_process.py +187 -0
rasa/model_service.py +3 -5
rasa/model_training.py +3 -1
rasa/shared/constants.py +22 -0
rasa/shared/core/domain.py +8 -5
rasa/shared/core/flows/yaml_flows_io.py +13 -4
rasa/shared/importers/importer.py +19 -2
rasa/shared/importers/rasa.py +5 -1
rasa/shared/nlu/training_data/formats/rasa_yaml.py +18 -3
rasa/shared/providers/_utils.py +79 -0
rasa/shared/providers/embedding/default_litellm_embedding_client.py +24 -0
rasa/shared/providers/llm/default_litellm_llm_client.py +24 -0
rasa/shared/utils/common.py +29 -2
rasa/shared/utils/health_check/health_check.py +26 -24
rasa/shared/utils/yaml.py +116 -31
rasa/studio/data_handler.py +3 -1
rasa/studio/upload.py +119 -57
rasa/validator.py +40 -4
rasa/version.py +1 -1
{rasa_pro-3.11.0rc3.dist-info → rasa_pro-3.11.1.dist-info}/METADATA +2 -2
{rasa_pro-3.11.0rc3.dist-info → rasa_pro-3.11.1.dist-info}/RECORD +48 -46
{rasa_pro-3.11.0rc3.dist-info → rasa_pro-3.11.1.dist-info}/NOTICE +0 -0
{rasa_pro-3.11.0rc3.dist-info → rasa_pro-3.11.1.dist-info}/WHEEL +0 -0
{rasa_pro-3.11.0rc3.dist-info → rasa_pro-3.11.1.dist-info}/entry_points.txt +0 -0

rasa/model_manager/model_api.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import asyncio
+from functools import wraps
 import os
 from http import HTTPStatus
-from typing import Any, Dict, Optional
+from typing import Any, Callable, Dict, Optional, Union
 import dotenv
+import psutil
 from sanic import Blueprint, Sanic, response
 from sanic.response import json
 from sanic.exceptions import NotFound
@@ -18,6 +20,7 @@ from rasa.model_manager.runner_service import (
     BotSession,
     BotSessionStatus,
     fetch_remote_model_to_dir,
+    fetch_size_of_remote_model,
     run_bot,
     terminate_bot,
     update_bot_status,
@@ -37,6 +40,10 @@ from rasa.model_manager.utils import (
     models_base_path,
     subpath,
 )
+from rasa.model_manager.warm_rasa_process import (
+    initialize_warm_rasa_process,
+    shutdown_warm_rasa_processes,
+)
 dotenv.load_dotenv()
@@ -111,7 +118,7 @@ async def continuously_update_process_status() -> None:
         except Exception as e:
             structlogger.error("model_api.update_process_status.error", error=str(e))
         finally:
-            await asyncio.sleep(1)
+            await asyncio.sleep(0.1)
 def internal_blueprint() -> Blueprint:
@@ -124,61 +131,112 @@ def internal_blueprint() -> Blueprint:
         structlogger.debug("model_api.cleanup_processes.started")
         cleanup_training_processes()
         cleanup_bot_processes()
+        shutdown_warm_rasa_processes()
-    @bp.on_request  # type: ignore[misc]
-    async def limit_parallel_training_requests(request: Request) -> Any:
-        """Limit the number of parallel training requests."""
-        from rasa.model_manager.config import MAX_PARALLEL_TRAININGS
-        if not request.url.endswith("/training"):
-            return None
-        running_requests = len(
-            [
-                training
-                for training in trainings.values()
-                if training.status == TrainingSessionStatus.RUNNING
-                and training.process.poll() is None
-            ]
-        )
-        if running_requests >= int(MAX_PARALLEL_TRAININGS):
-            return response.json(
-                {
-                    "message": f"Too many parallel training requests, above "
-                    f"the limit of {MAX_PARALLEL_TRAININGS}. "
-                    f"Retry later or increase your server's "
-                    f"memory and CPU resources."
-                },
-                status=HTTPStatus.TOO_MANY_REQUESTS,
-            )
+    @bp.after_server_start
+    async def create_warm_rasa_processes(
+        app: Sanic, loop: asyncio.AbstractEventLoop
+    ) -> None:
+        """Create warm Rasa processes to speed up future training and bot runs."""
+        structlogger.debug("model_api.create_warm_rasa_processes.started")
+        initialize_warm_rasa_process()
-    @bp.on_request  # type: ignore[misc]
-    async def limit_parallel_bot_runs(request: Request) -> Any:
-        """Limit the number of parallel bot runs."""
-        from rasa.model_manager.config import MAX_PARALLEL_BOT_RUNS
-        if not request.url.endswith("/bot"):
-            return None
+    def limit_parallel_training_requests() -> Callable[[Callable], Callable[..., Any]]:
+        """Limit the number of parallel training requests."""
-        running_requests = len(
-            [
-                bot
-                for bot in running_bots.values()
-                if bot.status in {BotSessionStatus.RUNNING, BotSessionStatus.QUEUED}
-            ]
-        )
+        def decorator(f: Callable) -> Callable:
+            @wraps(f)
+            def decorated(*args: Any, **kwargs: Any) -> Any:
+                running_requests = len(
+                    [
+                        training
+                        for training in trainings.values()
+                        if training.status == TrainingSessionStatus.RUNNING
+                        and training.process.poll() is None
+                    ]
+                )
+                if running_requests >= int(config.MAX_PARALLEL_TRAININGS):
+                    return response.json(
+                        {
+                            "message": f"Too many parallel training requests, above "
+                            f"the limit of {config.MAX_PARALLEL_TRAININGS}. "
+                            f"Retry later or increase your server's "
+                            f"memory and CPU resources."
+                        },
+                        status=HTTPStatus.TOO_MANY_REQUESTS,
+                    )
+                return f(*args, **kwargs)
+            return decorated
+        return decorator
+    def limit_parallel_bot_runs() -> Callable[[Callable], Callable[..., Any]]:
+        """Limit the number of parallel training requests."""
-        if running_requests >= int(MAX_PARALLEL_BOT_RUNS):
-            return response.json(
-                {
-                    "message": f"Too many parallel bot runs, above "
-                    f"the limit of {MAX_PARALLEL_BOT_RUNS}. "
-                    f"Retry later or increase your server's "
-                    f"memory and CPU resources."
-                },
-                status=HTTPStatus.TOO_MANY_REQUESTS,
-            )
+        def decorator(f: Callable) -> Callable:
+            @wraps(f)
+            def decorated(*args: Any, **kwargs: Any) -> Any:
+                running_requests = len(
+                    [
+                        bot
+                        for bot in running_bots.values()
+                        if bot.status
+                        in {BotSessionStatus.RUNNING, BotSessionStatus.QUEUED}
+                    ]
+                )
+                if running_requests >= int(config.MAX_PARALLEL_BOT_RUNS):
+                    return response.json(
+                        {
+                            "message": f"Too many parallel bot runs, above "
+                            f"the limit of {config.MAX_PARALLEL_BOT_RUNS}. "
+                            f"Retry later or increase your server's "
+                            f"memory and CPU resources."
+                        },
+                        status=HTTPStatus.TOO_MANY_REQUESTS,
+                    )
+                return f(*args, **kwargs)
+            return decorated
+        return decorator
+    def ensure_minimum_disk_space() -> Callable[[Callable], Callable[..., Any]]:
+        """Ensure that there is enough disk space before starting a new process."""
+        min_required_disk_space = 1024 * 1024 * config.MIN_REQUIRED_DISCSPACE_MB
+        def decorator(f: Callable) -> Callable:
+            @wraps(f)
+            def decorated(*args: Any, **kwargs: Any) -> Any:
+                if os.path.exists(config.SERVER_BASE_WORKING_DIRECTORY):
+                    free_space_bytes = psutil.disk_usage(
+                        config.SERVER_BASE_WORKING_DIRECTORY
+                    ).free
+                    structlogger.debug(
+                        "model_api.storage.available_disk_space",
+                        available_space_mb=free_space_bytes / 1024 / 1024,
+                    )
+                    if free_space_bytes < min_required_disk_space:
+                        return response.json(
+                            {
+                                "message": (
+                                    f"Less than {config.MIN_REQUIRED_DISCSPACE_MB} MB "
+                                    f"of free disk space available. "
+                                    f"Please free up some space on the model service."
+                                )
+                            },
+                            status=HTTPStatus.INSUFFICIENT_STORAGE,
+                        )
+                return f(*args, **kwargs)
+            return decorated
+        return decorator
     @bp.get("/")
     async def health(request: Request) -> response.HTTPResponse:
@@ -190,6 +248,7 @@ def internal_blueprint() -> Blueprint:
                         "deployment_id": bot.deployment_id,
                         "status": bot.status,
                         "internal_url": bot.internal_url,
+                        "returncode": bot.returncode,
                         "url": bot.url,
                     }
                     for bot in running_bots.values()
@@ -227,6 +286,8 @@ def internal_blueprint() -> Blueprint:
         return json({"training_sessions": sessions, "total_number": len(sessions)})
     @bp.post("/training")
+    @limit_parallel_training_requests()
+    @ensure_minimum_disk_space()
     async def start_training(request: Request) -> response.HTTPResponse:
         """Start a new training session."""
         data = request.json
@@ -277,7 +338,7 @@ def internal_blueprint() -> Blueprint:
                     "progress": training.progress,
                     "model_name": training.model_name,
                     "status": training.status,
-                    "logs": get_logs_content(training_id),
+                    "logs": get_logs_content(training.log_id),
                 }
             )
         else:
@@ -295,6 +356,8 @@ def internal_blueprint() -> Blueprint:
         return json({"training_id": training_id})
     @bp.post("/bot")
+    @limit_parallel_bot_runs()
+    @ensure_minimum_disk_space()
     async def start_bot(request: Request) -> response.HTTPResponse:
         data = request.json
         deployment_id: Optional[str] = data.get("deployment_id")
@@ -359,8 +422,9 @@ def internal_blueprint() -> Blueprint:
             {
                 "deployment_id": deployment_id,
                 "status": bot.status,
+                "returncode": bot.returncode,
                 "url": bot.url,
-                "logs": get_logs_content(deployment_id),
+                "logs": get_logs_content(bot.log_id),
             }
         )
@@ -370,26 +434,47 @@ def internal_blueprint() -> Blueprint:
             {
                 "deployment_id": bot.deployment_id,
                 "status": bot.status,
+                "returncode": bot.returncode,
                 "url": bot.url,
             }
             for bot in running_bots.values()
         ]
         return json({"deployment_sessions": bots, "total_number": len(bots)})
-    @bp.route("/models/<model_name>")
-    async def send_model(request: Request, model_name: str) -> response.HTTPResponse:
+    @bp.route("/models/<model_name>", methods=["GET"])
+    async def send_model(
+        request: Request, model_name: str
+    ) -> Union[response.ResponseStream, response.HTTPResponse]:
         try:
             model_path = path_to_model(model_name)
-            if not model_path:
-                return json({"message": "Model not found"}, status=404)
+            # get size of model file
+            model_size = os.stat(model_path)
-            return await response.file(model_path)
+            return await response.file_stream(
+                model_path, headers={"Content-Length": str(model_size.st_size)}
+            )
         except NotFound:
             return json({"message": "Model not found"}, status=404)
         except ModelNotFound:
             return json({"message": "Model not found"}, status=404)
+    @bp.route("/models/<model_name>", methods=["HEAD"])
+    async def head_model(request: Request, model_name: str) -> response.HTTPResponse:
+        try:
+            model_size = size_of_model(model_name)
+            structlogger.debug(
+                "model_api.internal.head_model",
+                model_name=model_name,
+                size=model_size,
+            )
+            return response.raw(
+                b"", status=200, headers={"Content-Length": str(model_size)}
+            )
+        except ModelNotFound:
+            return response.raw(b"", status=404)
     return bp
@@ -432,6 +517,26 @@ def external_blueprint() -> Blueprint:
     return bp
+def size_of_model(model_name: str) -> Optional[int]:
+    """Return the size of a model."""
+    model_file_name = f"{model_name}.{MODEL_ARCHIVE_EXTENSION}"
+    model_path = subpath(models_base_path(), model_file_name)
+    if os.path.exists(model_path):
+        return os.path.getsize(model_path)
+    if config.SERVER_MODEL_REMOTE_STORAGE:
+        structlogger.debug(
+            "model_api.storage.fetching_remote_model_size",
+            model_name=model_file_name,
+        )
+        return fetch_size_of_remote_model(
+            model_file_name,
+            config.SERVER_MODEL_REMOTE_STORAGE,
+        )
+    raise ModelNotFound("Model not found.")
 def path_to_model(model_name: str) -> Optional[str]:
     """Return the path to a local model."""
     model_file_name = f"{model_name}.{MODEL_ARCHIVE_EXTENSION}"
@@ -451,4 +556,4 @@ def path_to_model(model_name: str) -> Optional[str]:
             config.SERVER_MODEL_REMOTE_STORAGE,
         )
-    return None
+    raise ModelNotFound("Model not found.")

rasa/model_manager/runner_service.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import shutil
-from typing import Dict
+from typing import Dict, Optional
 import aiohttp
 import structlog
 import subprocess
@@ -16,7 +16,8 @@ from rasa.model_manager.utils import (
 from rasa.constants import MODEL_ARCHIVE_EXTENSION
 from rasa.model_manager import config
-from rasa.model_manager.utils import logs_path, ensure_base_directory_exists
+from rasa.model_manager.utils import logs_path
+from rasa.model_manager.warm_rasa_process import start_rasa_process
 structlogger = structlog.get_logger()
@@ -40,6 +41,8 @@ class BotSession(BaseModel):
     url: str
     internal_url: str
     port: int
+    log_id: str
+    returncode: Optional[int] = None
     def is_alive(self) -> bool:
         """Check if the bot is alive."""
@@ -81,9 +84,6 @@ async def is_bot_startup_finished(bot: BotSession) -> bool:
             async with session.get(f"{bot.internal_url}/license") as resp:
                 return resp.status == 200
     except aiohttp.client_exceptions.ClientConnectorError:
-        structlogger.debug(
-            "model_runner.bot.not_running_yet", deployment_id=bot.deployment_id
-        )
         return False
@@ -95,6 +95,7 @@ def set_bot_status_to_stopped(bot: BotSession) -> None:
         status=bot.process.returncode,
     )
     bot.status = BotSessionStatus.STOPPED
+    bot.returncode = bot.process.returncode
 def set_bot_status_to_running(bot: BotSession) -> None:
@@ -181,18 +182,25 @@ def fetch_remote_model_to_dir(
         raise ModelNotFound() from e
+def fetch_size_of_remote_model(model_name: str, storage_type: str) -> int:
+    """Fetch the size of the model from remote storage."""
+    from rasa.core.persistor import get_persistor
+    persistor = get_persistor(storage_type)
+    # we now there must be a persistor, because the config is set
+    # this is here to please the type checker for the call below
+    assert persistor is not None
+    return persistor.size_of_persisted_model(model_name=model_name)
 def start_bot_process(
     deployment_id: str, bot_base_path: str, base_url_path: str
 ) -> BotSession:
     port = get_open_port()
-    log_path = logs_path(deployment_id)
-    ensure_base_directory_exists(log_path)
-    full_command = [
-        config.RASA_PYTHON_PATH,
-        "-m",
-        "rasa.__main__",
+    arguments = [
         "run",
         "--endpoints",
         f"{bot_base_path}/endpoints.yml",
@@ -207,35 +215,30 @@ def start_bot_process(
     structlogger.debug(
         "model_runner.bot.starting_command",
         deployment_id=deployment_id,
-        command=" ".join(full_command),
+        arguments=" ".join(arguments),
     )
-    process = subprocess.Popen(
-        full_command,
-        cwd=bot_base_path,
-        stdout=open(log_path, "w"),
-        stderr=subprocess.STDOUT,
-        env=os.environ.copy(),
-    )
+    warm_process = start_rasa_process(cwd=bot_base_path, arguments=arguments)
     internal_bot_url = f"http://localhost:{port}"
     structlogger.info(
         "model_runner.bot.starting",
         deployment_id=deployment_id,
-        log=log_path,
+        log=logs_path(warm_process.log_id),
         url=internal_bot_url,
         port=port,
-        pid=process.pid,
+        pid=warm_process.process.pid,
     )
     return BotSession(
         deployment_id=deployment_id,
         status=BotSessionStatus.QUEUED,
-        process=process,
+        process=warm_process.process,
         url=f"{base_url_path}?deployment_id={deployment_id}",
         internal_url=internal_bot_url,
         port=port,
+        log_id=warm_process.log_id,
     )
@@ -246,10 +249,11 @@ def run_bot(
     encoded_configs: Dict[str, str],
 ) -> BotSession:
     """Deploy a bot based on a given training id."""
-    bot_base_path = bot_path(deployment_id)
-    prepare_bot_directory(bot_base_path, model_name, encoded_configs)
+    with structlog.contextvars.bound_contextvars(model_name=model_name):
+        bot_base_path = bot_path(deployment_id)
+        prepare_bot_directory(bot_base_path, model_name, encoded_configs)
-    return start_bot_process(deployment_id, bot_base_path, base_url_path)
+        return start_bot_process(deployment_id, bot_base_path, base_url_path)
 async def update_bot_status(bot: BotSession) -> None:
@@ -274,6 +278,7 @@ def terminate_bot(bot: BotSession) -> None:
             status=bot.process.returncode,
         )
         bot.status = BotSessionStatus.STOPPED
+        bot.returncode = bot.process.returncode
     except ProcessLookupError:
         structlogger.debug(
             "model_runner.stop_bot.process_not_found",

rasa/model_manager/trainer_service.py CHANGED Viewed

@@ -13,6 +13,9 @@ from pydantic import BaseModel, ConfigDict
 from enum import Enum
 from rasa.model_manager import config
+from rasa.model_manager.warm_rasa_process import (
+    start_rasa_process,
+)
 from rasa.model_training import generate_random_model_name
 from rasa.model_manager.utils import ensure_base_directory_exists, logs_path
@@ -40,6 +43,7 @@ class TrainingSession(BaseModel):
     model_name: str
     status: TrainingSessionStatus
     process: subprocess.Popen
+    log_id: str
     def is_status_indicating_alive(self) -> bool:
         """Check if the training is running."""
@@ -244,19 +248,12 @@ def start_training_process(
     client_id: str,
     training_base_path: str,
 ) -> TrainingSession:
-    log_path = logs_path(training_id)
-    ensure_base_directory_exists(log_path)
     model_name = generate_random_model_name()
     # Start the training in a subprocess
     # set the working directory to the training directory
     # run the rasa train command as a subprocess, activating poetry before running
     # pipe the stdout and stderr to the same file
-    full_command = [
-        config.RASA_PYTHON_PATH,
-        "-m",
-        "rasa.__main__",
+    arguments = [
         "train",
         "--debug",
         "--data",
@@ -274,7 +271,7 @@ def start_training_process(
     ]
     if config.SERVER_MODEL_REMOTE_STORAGE:
-        full_command.extend(
+        arguments.extend(
             [
                 "--keep-local-model-copy",
                 "--remote-storage",
@@ -282,27 +279,20 @@ def start_training_process(
             ]
         )
-    structlogger.debug("model_trainer.training_command", command=" ".join(full_command))
-    envs = os.environ.copy()
-    envs["RASA_TELEMETRY_ENABLED"] = "false"
-    process = subprocess.Popen(
-        full_command,
-        cwd=training_base_path,
-        stdout=open(log_path, "w"),
-        stderr=subprocess.STDOUT,
-        env=envs,
+    structlogger.debug(
+        "model_trainer.training_arguments", arguments=" ".join(arguments)
     )
+    warm_process = start_rasa_process(cwd=training_base_path, arguments=arguments)
     structlogger.info(
         "model_trainer.training_started",
         training_id=training_id,
         assistant_id=assistant_id,
         model_name=model_name,
         client_id=client_id,
-        log=log_path,
-        pid=process.pid,
+        log=logs_path(warm_process.log_id),
+        pid=warm_process.process.pid,
     )
     return TrainingSession(
@@ -312,7 +302,8 @@ def start_training_process(
         model_name=model_name,
         progress=0,
         status=TrainingSessionStatus.RUNNING,
-        process=process,  # Store the process handle
+        process=warm_process.process,  # Store the process handle
+        log_id=warm_process.log_id,
     )

rasa-pro 3.11.0rc3__py3-none-any.whl → 3.11.1__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.11.0rc3py3-none-any.whl → 3.11.1py3-none-any.whl