rapidfireai 0.9.9__py3-none-any.whl → 0.9.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidfireai might be problematic. Click here for more details.
- rapidfireai/cli.py +25 -5
- rapidfireai/experiment.py +5 -1
- rapidfireai/start.sh +154 -141
- rapidfireai/utils/constants.py +1 -2
- rapidfireai/utils/ping.py +29 -0
- rapidfireai/utils/shm_manager.py +15 -11
- rapidfireai/version.py +2 -2
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/METADATA +40 -11
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/RECORD +19 -12
- rapidfireai-0.9.11.dist-info/entry_points.txt +2 -0
- tutorial_notebooks/rf-tutorial-dpo-alignment-lite.ipynb +412 -0
- tutorial_notebooks/rf-tutorial-dpo-alignment.ipynb +427 -0
- tutorial_notebooks/rf-tutorial-grpo-mathreasoning-lite.ipynb +358 -0
- tutorial_notebooks/rf-tutorial-grpo-mathreasoning.ipynb +371 -0
- tutorial_notebooks/rf-tutorial-sft-chatqa-lite.ipynb +329 -0
- tutorial_notebooks/rf-tutorial-sft-chatqa.ipynb +331 -0
- rapidfireai-0.9.9.dist-info/entry_points.txt +0 -2
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/WHEEL +0 -0
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/licenses/LICENSE +0 -0
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/top_level.txt +0 -0
rapidfireai/cli.py
CHANGED
|
@@ -10,6 +10,7 @@ import argparse
|
|
|
10
10
|
import platform
|
|
11
11
|
import shutil
|
|
12
12
|
import re
|
|
13
|
+
import site
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from .version import __version__
|
|
15
16
|
|
|
@@ -301,8 +302,6 @@ def install_packages():
|
|
|
301
302
|
# Generate CUDA requirements file
|
|
302
303
|
cuda_major = get_cuda_version()
|
|
303
304
|
compute_capability = get_compute_capability()
|
|
304
|
-
print(f"CUDA major version: {cuda_major}")
|
|
305
|
-
print(f"Compute capability: {compute_capability}")
|
|
306
305
|
if cuda_major == 12:
|
|
307
306
|
print(f"\n🎯 Detected CUDA {cuda_major}.x")
|
|
308
307
|
packages.append({"package": "vllm==0.10.1.1", "extra_args": ["--torch-backend=cu126"]})
|
|
@@ -313,7 +312,7 @@ def install_packages():
|
|
|
313
312
|
print("\n⚠️ CUDA version not detected or unsupported.")
|
|
314
313
|
if compute_capability == 7:
|
|
315
314
|
print(f"\n🎯 Detected CUDA Compute Capability {compute_capability}.x")
|
|
316
|
-
|
|
315
|
+
print("Skipping flash-attn installation")
|
|
317
316
|
elif compute_capability == 8:
|
|
318
317
|
print(f"\n🎯 Detected CUDA Compute Capability {compute_capability}.x")
|
|
319
318
|
packages.append({"package": "flash-attn==2.8.3", "extra_args": ["--no-build-isolation"]})
|
|
@@ -333,19 +332,40 @@ def install_packages():
|
|
|
333
332
|
print(f" You may need to install {package} manually")
|
|
334
333
|
return 0
|
|
335
334
|
|
|
335
|
+
def copy_tutorial_notebooks():
|
|
336
|
+
"""Copy the tutorial notebooks to the project."""
|
|
337
|
+
print("Getting tutorial notebooks...")
|
|
338
|
+
try:
|
|
339
|
+
tutorial_path = os.getenv("RF_TUTORIAL_PATH", os.path.join(".", "tutorial_notebooks"))
|
|
340
|
+
site_packages_path = site.getsitepackages()[0]
|
|
341
|
+
source_path =os.path.join(site_packages_path, "tutorial_notebooks")
|
|
342
|
+
print(f"Copying tutorial notebooks from {source_path} to {tutorial_path}...")
|
|
343
|
+
os.makedirs(tutorial_path, exist_ok=True)
|
|
344
|
+
shutil.copytree(source_path, tutorial_path, dirs_exist_ok=True)
|
|
345
|
+
print(f"✅ Successfully copied notebooks to {tutorial_path}")
|
|
346
|
+
except Exception as e:
|
|
347
|
+
print(f"❌ Failed to copy notebooks to {tutorial_path}")
|
|
348
|
+
print(f" Error: {e}")
|
|
349
|
+
print(" You may need to copy notebooks manually")
|
|
350
|
+
return 1
|
|
351
|
+
return 0
|
|
352
|
+
|
|
353
|
+
|
|
336
354
|
def run_init():
|
|
337
355
|
"""Run the init command to initialize the project."""
|
|
338
356
|
print("🔧 Initializing RapidFire AI project...")
|
|
339
357
|
print("-" * 30)
|
|
340
358
|
print("Initializing project...")
|
|
341
359
|
install_packages()
|
|
360
|
+
copy_tutorial_notebooks()
|
|
361
|
+
|
|
342
362
|
return 0
|
|
343
363
|
|
|
344
364
|
def main():
|
|
345
|
-
"""Main entry point for the
|
|
365
|
+
"""Main entry point for the rapidfireai command."""
|
|
346
366
|
parser = argparse.ArgumentParser(
|
|
347
367
|
description="RapidFire AI - Start/stop/manage services",
|
|
348
|
-
prog="
|
|
368
|
+
prog="rapidfireai"
|
|
349
369
|
)
|
|
350
370
|
|
|
351
371
|
parser.add_argument(
|
rapidfireai/experiment.py
CHANGED
|
@@ -5,7 +5,8 @@ This module contains the Experiment class which manages the entire experiment li
|
|
|
5
5
|
import multiprocessing as mp
|
|
6
6
|
import os
|
|
7
7
|
import traceback
|
|
8
|
-
from
|
|
8
|
+
from collections.abc import Callable
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import pandas as pd
|
|
11
12
|
from torch.utils.data import Dataset
|
|
@@ -17,6 +18,7 @@ from rapidfireai.utils.exceptions import ExperimentException
|
|
|
17
18
|
from rapidfireai.utils.experiment_utils import ExperimentUtils
|
|
18
19
|
from rapidfireai.utils.logging import RFLogger
|
|
19
20
|
from rapidfireai.utils.mlflow_manager import MLflowManager
|
|
21
|
+
from rapidfireai.version import __version__
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
class Experiment:
|
|
@@ -61,6 +63,8 @@ class Experiment:
|
|
|
61
63
|
self.logger = RFLogger().create_logger("experiment")
|
|
62
64
|
for msg in log_messages:
|
|
63
65
|
self.logger.info(msg)
|
|
66
|
+
# Log the version of rapidfireai that is running
|
|
67
|
+
self.logger.info(f"Running RapidFire AI version {__version__}")
|
|
64
68
|
except Exception as e:
|
|
65
69
|
raise ExperimentException(f"Error creating logger: {e}, traceback: {traceback.format_exc()}") from e
|
|
66
70
|
|