PyPI - rapidfireai - Versions diffs - 0.10.2rc5__py3-none-any.whl → 0.11.1rc1__py3-none-any.whl - Mend

rapidfireai 0.10.2rc5py3-none-any.whl → 0.11.1rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rapidfireai might be problematic. Click here for more details.

Files changed (36) hide show

rapidfireai/automl/grid_search.py +4 -5
rapidfireai/automl/model_config.py +41 -37
rapidfireai/automl/random_search.py +21 -33
rapidfireai/backend/controller.py +80 -161
rapidfireai/backend/worker.py +26 -8
rapidfireai/cli.py +171 -132
rapidfireai/db/rf_db.py +1 -1
rapidfireai/db/tables.sql +1 -1
rapidfireai/dispatcher/dispatcher.py +3 -1
rapidfireai/dispatcher/gunicorn.conf.py +1 -1
rapidfireai/experiment.py +86 -7
rapidfireai/frontend/build/asset-manifest.json +3 -3
rapidfireai/frontend/build/index.html +1 -1
rapidfireai/frontend/build/static/js/{main.1bf27639.js → main.58393d31.js} +3 -3
rapidfireai/frontend/build/static/js/{main.1bf27639.js.map → main.58393d31.js.map} +1 -1
rapidfireai/frontend/proxy_middleware.py +1 -1
rapidfireai/ml/callbacks.py +85 -59
rapidfireai/ml/trainer.py +42 -86
rapidfireai/start.sh +117 -34
rapidfireai/utils/constants.py +22 -1
rapidfireai/utils/experiment_utils.py +87 -43
rapidfireai/utils/interactive_controller.py +473 -0
rapidfireai/utils/logging.py +1 -2
rapidfireai/utils/metric_logger.py +346 -0
rapidfireai/utils/mlflow_manager.py +0 -1
rapidfireai/utils/ping.py +4 -2
rapidfireai/utils/worker_manager.py +16 -6
rapidfireai/version.py +2 -2
{rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/METADATA +7 -4
{rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/RECORD +36 -33
tutorial_notebooks/rf-colab-tensorboard-tutorial.ipynb +314 -0
/rapidfireai/frontend/build/static/js/{main.1bf27639.js.LICENSE.txt → main.58393d31.js.LICENSE.txt} +0 -0
{rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/WHEEL +0 -0
{rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/entry_points.txt +0 -0
{rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/licenses/LICENSE +0 -0
{rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/top_level.txt +0 -0

rapidfireai/cli.py CHANGED Viewed

@@ -3,15 +3,16 @@
 Command-line interface for RapidFire AI
 """
-import os
-import sys
-import subprocess
 import argparse
+import os
 import platform
-import shutil
 import re
+import shutil
 import site
+import subprocess
+import sys
 from pathlib import Path
 from .version import __version__
@@ -20,24 +21,24 @@ def get_script_path():
     # Get the directory where this package is installed
     package_dir = Path(__file__).parent
     script_path = package_dir / "start.sh"
     if not script_path.exists():
         # Fallback: try to find it relative to the current working directory
         script_path = Path.cwd() / "rapidfireai" / "start.sh"
         if not script_path.exists():
             raise FileNotFoundError(f"Could not find start.sh script at {script_path}")
     return script_path
 def run_script(args):
     """Run the start.sh script with the given arguments."""
     script_path = get_script_path()
     # Make sure the script is executable
     if not os.access(script_path, os.X_OK):
         os.chmod(script_path, 0o755)
     # Run the script with the provided arguments
     try:
         result = subprocess.run([str(script_path)] + args, check=True)
@@ -53,24 +54,27 @@ def run_script(args):
 def get_python_info():
     """Get comprehensive Python information."""
     info = {}
     # Python version and implementation
-    info['version'] = sys.version
-    info['implementation'] = platform.python_implementation()
-    info['executable'] = sys.executable
+    info["version"] = sys.version
+    info["implementation"] = platform.python_implementation()
+    info["executable"] = sys.executable
     # Environment information
-    info['conda_env'] = os.environ.get('CONDA_DEFAULT_ENV', 'none')
-    info['venv'] = 'yes' if hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix) else 'no'
+    info["conda_env"] = os.environ.get("CONDA_DEFAULT_ENV", "none")
+    info["venv"] = (
+        "yes"
+        if hasattr(sys, "real_prefix") or (hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix)
+        else "no"
+    )
     return info
 def get_pip_packages():
     """Get list of installed pip packages."""
     try:
-        result = subprocess.run([sys.executable, '-m', 'pip', 'list'],
-                              capture_output=True, text=True, check=True)
+        result = subprocess.run([sys.executable, "-m", "pip", "list"], capture_output=True, text=True, check=True)
         return result.stdout
     except (subprocess.CalledProcessError, FileNotFoundError):
         return "Failed to get pip packages"
@@ -79,113 +83,107 @@ def get_pip_packages():
 def get_gpu_info():
     """Get comprehensive GPU and CUDA information."""
     info = {}
     # Check for nvidia-smi
-    nvidia_smi_path = shutil.which('nvidia-smi')
-    info['nvidia_smi'] = 'found' if nvidia_smi_path else 'not found'
+    nvidia_smi_path = shutil.which("nvidia-smi")
+    info["nvidia_smi"] = "found" if nvidia_smi_path else "not found"
     if nvidia_smi_path:
         try:
             # Get driver and CUDA runtime version from the full nvidia-smi output
-            result = subprocess.run(['nvidia-smi'],
-                                  capture_output=True, text=True, check=True)
+            result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, check=True)
             if result.stdout.strip():
-                lines = result.stdout.strip().split('\n')
+                lines = result.stdout.strip().split("\n")
                 # Look for the header line that contains CUDA version
                 for line in lines:
-                    if 'CUDA Version:' in line:
+                    if "CUDA Version:" in line:
                         # Extract CUDA version from line like "NVIDIA-SMI 535.183.06 Driver Version: 535.183.06 CUDA Version: 12.2"
-                        cuda_version = line.split('CUDA Version:')[1].split()[0]
-                        info['cuda_runtime'] = cuda_version
+                        cuda_version = line.split("CUDA Version:")[1].split()[0]
+                        info["cuda_runtime"] = cuda_version
                         # Also extract driver version from the same line
-                        if 'Driver Version:' in line:
-                            driver_version = line.split('Driver Version:')[1].split('CUDA Version:')[0].strip()
-                            info['driver_version'] = driver_version
+                        if "Driver Version:" in line:
+                            driver_version = line.split("Driver Version:")[1].split("CUDA Version:")[0].strip()
+                            info["driver_version"] = driver_version
                         break
                 else:
-                    info['driver_version'] = 'unknown'
-                    info['cuda_runtime'] = 'unknown'
+                    info["driver_version"] = "unknown"
+                    info["cuda_runtime"] = "unknown"
         except (subprocess.CalledProcessError, ValueError):
-            info['driver_version'] = 'unknown'
-            info['cuda_runtime'] = 'unknown'
+            info["driver_version"] = "unknown"
+            info["cuda_runtime"] = "unknown"
         # Get GPU count, models, and VRAM
         try:
-            result = subprocess.run(['nvidia-smi', '--query-gpu=count,name,memory.total', '--format=csv,noheader,nounits'],
-                                  capture_output=True, text=True, check=True)
+            result = subprocess.run(
+                ["nvidia-smi", "--query-gpu=count,name,memory.total", "--format=csv,noheader,nounits"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
             if result.stdout.strip():
-                lines = result.stdout.strip().split('\n')
+                lines = result.stdout.strip().split("\n")
                 if lines:
-                    count, name, memory = lines[0].split(', ')
-                    info['gpu_count'] = int(count)
-                    info['gpu_model'] = name.strip()
+                    count, name, memory = lines[0].split(", ")
+                    info["gpu_count"] = int(count)
+                    info["gpu_model"] = name.strip()
                     # Convert memory from MiB to GB
                     memory_mib = int(memory.split()[0])
                     memory_gb = memory_mib / 1024
-                    info['gpu_memory_gb'] = f"{memory_gb:.1f}"
+                    info["gpu_memory_gb"] = f"{memory_gb:.1f}"
                     # Get detailed info for multiple GPUs if present
-                    if info['gpu_count'] > 1:
-                        info['gpu_details'] = []
+                    if info["gpu_count"] > 1:
+                        info["gpu_details"] = []
                         for line in lines:
-                            count, name, memory = line.split(', ')
+                            count, name, memory = line.split(", ")
                             memory_mib = int(memory.split()[0])
                             memory_gb = memory_mib / 1024
-                            info['gpu_details'].append({
-                                'name': name.strip(),
-                                'memory_gb': f"{memory_gb:.1f}"
-                            })
+                            info["gpu_details"].append({"name": name.strip(), "memory_gb": f"{memory_gb:.1f}"})
         except (subprocess.CalledProcessError, ValueError):
-            info['gpu_count'] = 0
-            info['gpu_model'] = 'unknown'
-            info['gpu_memory_gb'] = 'unknown'
+            info["gpu_count"] = 0
+            info["gpu_model"] = "unknown"
+            info["gpu_memory_gb"] = "unknown"
     else:
-        info['driver_version'] = 'N/A'
-        info['cuda_runtime'] = 'N/A'
-        info['gpu_count'] = 0
-        info['gpu_model'] = 'N/A'
-        info['gpu_memory_gb'] = 'N/A'
+        info["driver_version"] = "N/A"
+        info["cuda_runtime"] = "N/A"
+        info["gpu_count"] = 0
+        info["gpu_model"] = "N/A"
+        info["gpu_memory_gb"] = "N/A"
     # Check for nvcc (CUDA compiler)
-    nvcc_path = shutil.which('nvcc')
-    info['nvcc'] = 'found' if nvcc_path else 'not found'
+    nvcc_path = shutil.which("nvcc")
+    info["nvcc"] = "found" if nvcc_path else "not found"
     if nvcc_path:
         try:
-            result = subprocess.run(['nvcc', '--version'],
-                                  capture_output=True, text=True, check=True)
+            result = subprocess.run(["nvcc", "--version"], capture_output=True, text=True, check=True)
             # Extract version from output like "Cuda compilation tools, release 11.8, V11.8.89"
-            version_line = result.stdout.split('\n')[0]
-            if 'release' in version_line:
-                version = version_line.split('release')[1].split(',')[0].strip()
-                info['nvcc_version'] = version
+            version_line = result.stdout.split("\n")[0]
+            if "release" in version_line:
+                version = version_line.split("release")[1].split(",")[0].strip()
+                info["nvcc_version"] = version
             else:
-                info['nvcc_version'] = 'unknown'
+                info["nvcc_version"] = "unknown"
         except subprocess.CalledProcessError:
-            info['nvcc_version'] = 'unknown'
+            info["nvcc_version"] = "unknown"
     else:
-        info['nvcc_version'] = 'N/A'
+        info["nvcc_version"] = "N/A"
     # Check CUDA installation paths
-    cuda_paths = [
-        '/usr/local/cuda',
-        '/opt/cuda',
-        '/usr/cuda',
-        os.path.expanduser('~/cuda')
-    ]
+    cuda_paths = ["/usr/local/cuda", "/opt/cuda", "/usr/cuda", os.path.expanduser("~/cuda")]
     cuda_installed = False
     for path in cuda_paths:
         if os.path.exists(path):
             cuda_installed = True
             break
-    info['cuda_installation'] = 'present' if cuda_installed else 'not present'
+    info["cuda_installation"] = "present" if cuda_installed else "not present"
     # Check if CUDA is on PATH
-    cuda_on_path = any('cuda' in p.lower() for p in os.environ.get('PATH', '').split(os.pathsep))
-    info['cuda_on_path'] = 'yes' if cuda_on_path else 'no'
+    cuda_on_path = any("cuda" in p.lower() for p in os.environ.get("PATH", "").split(os.pathsep))
+    info["cuda_on_path"] = "yes" if cuda_on_path else "no"
     return info
@@ -193,7 +191,7 @@ def run_doctor():
     """Run the doctor command to diagnose system issues."""
     print("🔍 RapidFire AI System Diagnostics")
     print("=" * 50)
     # Python Information
     print("\n🐍 Python Environment:")
     print("-" * 30)
@@ -203,94 +201,112 @@ def run_doctor():
     print(f"Executable: {python_info['executable']}")
     print(f"Conda Environment: {python_info['conda_env']}")
     print(f"Virtual Environment: {python_info['venv']}")
     # Pip Packages
     print("\n📦 Installed Packages:")
     print("-" * 30)
     pip_output = get_pip_packages()
     if pip_output != "Failed to get pip packages":
         # Show only relevant packages
-        relevant_packages = ['rapidfireai', 'mlflow', 'torch', 'transformers', 'flask', 'gunicorn', 'peft', 'trl', 'bitsandbytes', 'nltk', 'evaluate', 'rouge-score', 'sentencepiece']
-        lines = pip_output.split('\n')
+        relevant_packages = [
+            "rapidfireai",
+            "mlflow",
+            "torch",
+            "transformers",
+            "flask",
+            "gunicorn",
+            "peft",
+            "trl",
+            "bitsandbytes",
+            "nltk",
+            "evaluate",
+            "rouge-score",
+            "sentencepiece",
+        ]
+        lines = pip_output.split("\n")
         for line in lines:
             if any(pkg.lower() in line.lower() for pkg in relevant_packages):
                 print(line)
         print("... (showing only relevant packages)")
     else:
         print(pip_output)
     # GPU Information
     print("\n🚀 GPU & CUDA Information:")
     print("-" * 30)
     gpu_info = get_gpu_info()
     print(f"nvidia-smi: {gpu_info['nvidia_smi']}")
-    if gpu_info['nvidia_smi'] == 'found':
+    if gpu_info["nvidia_smi"] == "found":
         print(f"Driver Version: {gpu_info['driver_version']}")
         print(f"CUDA Runtime: {gpu_info['cuda_runtime']}")
         print(f"GPU Count: {gpu_info['gpu_count']}")
-        if gpu_info['gpu_count'] > 0:
-            if 'gpu_details' in gpu_info:
+        if gpu_info["gpu_count"] > 0:
+            if "gpu_details" in gpu_info:
                 print("GPU Details:")
-                for i, gpu in enumerate(gpu_info['gpu_details']):
+                for i, gpu in enumerate(gpu_info["gpu_details"]):
                     print(f"  GPU {i}: {gpu['name']} ({gpu['memory_gb']} GB)")
             else:
                 print(f"GPU Model: {gpu_info['gpu_model']}")
                 print(f"Total VRAM: {gpu_info['gpu_memory_gb']} GB")
     print(f"nvcc: {gpu_info['nvcc']}")
-    if gpu_info['nvcc'] == 'found':
+    if gpu_info["nvcc"] == "found":
         print(f"nvcc Version: {gpu_info['nvcc_version']}")
     print(f"CUDA Installation: {gpu_info['cuda_installation']}")
     print(f"CUDA on PATH: {gpu_info['cuda_on_path']}")
     # System Information
     print("\n💻 System Information:")
     print("-" * 30)
     print(f"Platform: {platform.platform()}")
     print(f"Architecture: {platform.machine()}")
     print(f"Processor: {platform.processor()}")
     # Environment Variables
     print("\n🔧 Environment Variables:")
     print("-" * 30)
-    relevant_vars = ['CUDA_HOME', 'CUDA_PATH', 'LD_LIBRARY_PATH', 'PATH']
+    relevant_vars = ["CUDA_HOME", "CUDA_PATH", "LD_LIBRARY_PATH", "PATH"]
     for var in relevant_vars:
-        value = os.environ.get(var, 'not set')
-        if value != 'not set' and len(value) > 100:
+        value = os.environ.get(var, "not set")
+        if value != "not set" and len(value) > 100:
             value = value[:100] + "..."
         print(f"{var}: {value}")
     print("\n✅ Diagnostics complete!")
     return 0
 def get_cuda_version():
     """Detect CUDA version from nvcc or nvidia-smi"""
     try:
-        result = subprocess.run(['nvcc', '--version'],
-                              capture_output=True, text=True, check=True)
-        match = re.search(r'release (\d+)\.(\d+)', result.stdout)
+        result = subprocess.run(["nvcc", "--version"], capture_output=True, text=True, check=True)
+        match = re.search(r"release (\d+)\.(\d+)", result.stdout)
         if match:
             return int(match.group(1))
     except (subprocess.CalledProcessError, FileNotFoundError):
         try:
-            result = subprocess.run(['nvidia-smi'],
-                                  capture_output=True, text=True, check=True)
-            match = re.search(r'CUDA Version: (\d+)\.(\d+)', result.stdout)
+            result = subprocess.run(["nvidia-smi"], capture_output=True, text=True, check=True)
+            match = re.search(r"CUDA Version: (\d+)\.(\d+)", result.stdout)
             if match:
                 return int(match.group(1))
         except (subprocess.CalledProcessError, FileNotFoundError):
             pass
     return None
 def get_compute_capability():
     """Get compute capability from nvidia-smi"""
     try:
-        result = subprocess.run(['nvidia-smi', '--query-gpu=compute_cap', '--format=csv,noheader,nounits'],
-                              capture_output=True, text=True, check=True)
-        match = re.search(r'(\d+)\.(\d+)', result.stdout)
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader,nounits"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        match = re.search(r"(\d+)\.(\d+)", result.stdout)
         if match:
             major = int(match.group(1))
             minor = int(match.group(2))
@@ -298,6 +314,7 @@ def get_compute_capability():
     except (subprocess.CalledProcessError, FileNotFoundError):
         return None
 def install_packages():
     """Install packages for the RapidFire AI project."""
     packages = []
@@ -314,11 +331,11 @@ def install_packages():
     #     packages.append({"package": "vllm==0.10.1.1", "extra_args": ["--torch-backend=cu118"]})
     # else:
     #     print("\n⚠️  CUDA version not detected or unsupported.")
     ## TODO: re-enable once flash-attn has fix
     # if cuda_major is not None:
     #     print(f"\n🎯 Detected CUDA {cuda_major}.x")
     #     # Determine flash-attn version based on CUDA version
     #     if cuda_major < 8:
     #         # flash-attn 1.x for CUDA < 8.0
@@ -349,13 +366,14 @@ def install_packages():
             print(f"   You may need to install {package} manually")
     return 0
 def copy_tutorial_notebooks():
     """Copy the tutorial notebooks to the project."""
     print("Getting tutorial notebooks...")
     try:
         tutorial_path = os.getenv("RF_TUTORIAL_PATH", os.path.join(".", "tutorial_notebooks"))
         site_packages_path = site.getsitepackages()[0]
-        source_path =os.path.join(site_packages_path, "tutorial_notebooks")
+        source_path = os.path.join(site_packages_path, "tutorial_notebooks")
         print(f"Copying tutorial notebooks from {source_path} to {tutorial_path}...")
         os.makedirs(tutorial_path, exist_ok=True)
         shutil.copytree(source_path, tutorial_path, dirs_exist_ok=True)
@@ -378,29 +396,50 @@ def run_init():
     return 0
 def main():
     """Main entry point for the rapidfireai command."""
-    parser = argparse.ArgumentParser(
-        description="RapidFire AI - Start/stop/manage services",
-        prog="rapidfireai"
-    )
+    parser = argparse.ArgumentParser(description="RapidFire AI - Start/stop/manage services", prog="rapidfireai")
     parser.add_argument(
         "command",
         nargs="?",
         default="start",
         choices=["start", "stop", "status", "restart", "setup", "doctor", "init"],
-        help="Command to execute (default: start)"
+        help="Command to execute (default: start)",
+    )
+    parser.add_argument("--version", action="version", version=f"RapidFire AI {__version__}")
+    parser.add_argument(
+        "--tracking-backend",
+        choices=["mlflow", "tensorboard", "both"],
+        default=os.getenv("RF_TRACKING_BACKEND", "mlflow"),
+        help="Tracking backend to use for metrics (default: mlflow)",
+    )
+    parser.add_argument(
+        "--tensorboard-log-dir",
+        default=os.getenv("RF_TENSORBOARD_LOG_DIR", None),
+        help="Directory for TensorBoard logs (default: {experiment_path}/tensorboard_logs)",
     )
     parser.add_argument(
-        "--version",
-        action="version",
-        version=f"RapidFire AI {__version__}"
+        "--colab",
+        action="store_true",
+        help="Run in Colab mode (skips frontend, conditionally starts MLflow based on tracking backend)",
     )
     args = parser.parse_args()
+    # Set environment variables from CLI args
+    if args.tracking_backend:
+        os.environ["RF_TRACKING_BACKEND"] = args.tracking_backend
+    if args.tensorboard_log_dir:
+        os.environ["RF_TENSORBOARD_LOG_DIR"] = args.tensorboard_log_dir
+    if args.colab:
+        os.environ["RF_COLAB_MODE"] = "true"
     # Handle doctor command separately
     if args.command == "doctor":
         return run_doctor()
@@ -408,10 +447,10 @@ def main():
     # Handle init command separately
     if args.command == "init":
         return run_init()
     # Run the script with the specified command
     return run_script([args.command])
 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(main())

rapidfireai/db/rf_db.py CHANGED Viewed

@@ -113,7 +113,7 @@ class RfDb:
     def create_experiment(
         self,
         experiment_name: str,
-        mlflow_experiment_id: str,
+        mlflow_experiment_id: str | None,
         config_options: dict[str, Any],
     ) -> int:
         """Create a new experiment"""

rapidfireai/db/tables.sql CHANGED Viewed

@@ -2,7 +2,7 @@
 CREATE TABLE IF NOT EXISTS experiments (
     experiment_id INTEGER PRIMARY KEY AUTOINCREMENT,
     experiment_name TEXT NOT NULL,
-    mlflow_experiment_id TEXT NOT NULL,
+    mlflow_experiment_id TEXT,
     config_options TEXT NOT NULL,
     status TEXT NOT NULL,
     current_task TEXT NOT NULL,

rapidfireai/dispatcher/dispatcher.py CHANGED Viewed

@@ -30,7 +30,9 @@ class Dispatcher:
         self.app: Flask = Flask(__name__)
         # Enable CORS for all routes
-        _ = CORS(self.app, resources={r"/*": {"origins": CORS_ALLOWED_ORIGINS}})
+        # Allow all origins for local development (dispatcher runs on localhost)
+        # This is safe since the API is not exposed to the internet
+        _ = CORS(self.app, resources={r"/*": {"origins": "*"}})
         # register routes
         self.register_routes()

rapidfireai/dispatcher/gunicorn.conf.py CHANGED Viewed

@@ -5,7 +5,7 @@ from rapidfireai.utils.constants import DispatcherConfig
 # Other Gunicorn settings...
 bind = f"{DispatcherConfig.HOST}:{DispatcherConfig.PORT}"
-workers = 2
+workers = 1  # Single worker for Colab/single-user environments to save memory
 wsgi_app = "rapidfireai.dispatcher.dispatcher:serve_forever()"

rapidfireai 0.10.2rc5__py3-none-any.whl → 0.11.1rc1__py3-none-any.whl

Potentially problematic release.

rapidfireai 0.10.2rc5py3-none-any.whl → 0.11.1rc1py3-none-any.whl