rapidfireai 0.10.2rc5__py3-none-any.whl → 0.11.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidfireai might be problematic. Click here for more details.
- rapidfireai/automl/grid_search.py +4 -5
- rapidfireai/automl/model_config.py +41 -37
- rapidfireai/automl/random_search.py +21 -33
- rapidfireai/backend/controller.py +80 -161
- rapidfireai/backend/worker.py +26 -8
- rapidfireai/cli.py +171 -132
- rapidfireai/db/rf_db.py +1 -1
- rapidfireai/db/tables.sql +1 -1
- rapidfireai/dispatcher/dispatcher.py +3 -1
- rapidfireai/dispatcher/gunicorn.conf.py +1 -1
- rapidfireai/experiment.py +86 -7
- rapidfireai/frontend/build/asset-manifest.json +3 -3
- rapidfireai/frontend/build/index.html +1 -1
- rapidfireai/frontend/build/static/js/{main.1bf27639.js → main.58393d31.js} +3 -3
- rapidfireai/frontend/build/static/js/{main.1bf27639.js.map → main.58393d31.js.map} +1 -1
- rapidfireai/frontend/proxy_middleware.py +1 -1
- rapidfireai/ml/callbacks.py +85 -59
- rapidfireai/ml/trainer.py +42 -86
- rapidfireai/start.sh +117 -34
- rapidfireai/utils/constants.py +22 -1
- rapidfireai/utils/experiment_utils.py +87 -43
- rapidfireai/utils/interactive_controller.py +473 -0
- rapidfireai/utils/logging.py +1 -2
- rapidfireai/utils/metric_logger.py +346 -0
- rapidfireai/utils/mlflow_manager.py +0 -1
- rapidfireai/utils/ping.py +4 -2
- rapidfireai/utils/worker_manager.py +16 -6
- rapidfireai/version.py +2 -2
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/METADATA +7 -4
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/RECORD +36 -33
- tutorial_notebooks/rf-colab-tensorboard-tutorial.ipynb +314 -0
- /rapidfireai/frontend/build/static/js/{main.1bf27639.js.LICENSE.txt → main.58393d31.js.LICENSE.txt} +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/WHEEL +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/entry_points.txt +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/licenses/LICENSE +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/top_level.txt +0 -0
rapidfireai/experiment.py
CHANGED
|
@@ -17,9 +17,11 @@ from rapidfireai.utils.constants import MLFLOW_URL
|
|
|
17
17
|
from rapidfireai.utils.exceptions import ExperimentException
|
|
18
18
|
from rapidfireai.utils.experiment_utils import ExperimentUtils
|
|
19
19
|
from rapidfireai.utils.logging import RFLogger
|
|
20
|
-
from rapidfireai.utils.mlflow_manager import MLflowManager
|
|
21
20
|
from rapidfireai.version import __version__
|
|
22
21
|
|
|
22
|
+
# Note: MLflowManager is imported lazily in get_results() to avoid
|
|
23
|
+
# connection attempts when using tensorboard-only mode
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class Experiment:
|
|
25
27
|
"""Class to manage the entire experiment lifecycle."""
|
|
@@ -39,6 +41,7 @@ class Experiment:
|
|
|
39
41
|
self.experiment_id: int | None = None
|
|
40
42
|
self.log_server_process: mp.Process | None = None
|
|
41
43
|
self.worker_processes: list[mp.Process] = []
|
|
44
|
+
self._training_thread: Any = None # Track background training thread (Colab only)
|
|
42
45
|
|
|
43
46
|
# create db tables
|
|
44
47
|
try:
|
|
@@ -88,13 +91,76 @@ class Experiment:
|
|
|
88
91
|
seed: int = 42,
|
|
89
92
|
) -> None:
|
|
90
93
|
"""Run the fit"""
|
|
94
|
+
|
|
95
|
+
# Check if training is already running
|
|
96
|
+
if self._training_thread is not None and self._training_thread.is_alive():
|
|
97
|
+
print("⚠️ Training is already running in background. Please wait for it to complete.")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
# Detect if running in Google Colab
|
|
91
101
|
try:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
102
|
+
import google.colab
|
|
103
|
+
|
|
104
|
+
in_colab = True
|
|
105
|
+
except ImportError:
|
|
106
|
+
in_colab = False
|
|
107
|
+
|
|
108
|
+
if in_colab:
|
|
109
|
+
# Run Controller in background thread to keep kernel responsive
|
|
110
|
+
import sys
|
|
111
|
+
import threading
|
|
112
|
+
from io import StringIO
|
|
113
|
+
|
|
114
|
+
from IPython.display import HTML, display
|
|
115
|
+
|
|
116
|
+
def _run_controller_background():
|
|
117
|
+
"""Run controller in background thread with output suppression"""
|
|
118
|
+
# Suppress stdout to avoid print statements appearing in wrong cells
|
|
119
|
+
old_stdout = sys.stdout
|
|
120
|
+
sys.stdout = StringIO()
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
controller = Controller(self.experiment_id, self.experiment_name)
|
|
124
|
+
controller.run_fit(param_config, create_model_fn, train_dataset, eval_dataset, num_chunks, seed)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
# Restore stdout for error logging
|
|
127
|
+
sys.stdout = old_stdout
|
|
128
|
+
if hasattr(self, "logger"):
|
|
129
|
+
self.logger.opt(exception=True).error(f"Error in background training: {e}")
|
|
130
|
+
display(HTML(f'<p style="color: red; font-weight: bold;">❌ Error in background training: {e}</p>'))
|
|
131
|
+
finally:
|
|
132
|
+
# Restore stdout
|
|
133
|
+
sys.stdout = old_stdout
|
|
134
|
+
# Display completion message
|
|
135
|
+
display(
|
|
136
|
+
HTML(
|
|
137
|
+
'<p style="color: blue; font-weight: bold;">🎉 Training completed! Check InteractiveController for final results.</p>'
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
self._training_thread = None
|
|
141
|
+
|
|
142
|
+
self._training_thread = threading.Thread(target=_run_controller_background, daemon=True)
|
|
143
|
+
self._training_thread.start()
|
|
144
|
+
|
|
145
|
+
# Use IPython display for reliable output in Colab
|
|
146
|
+
display(
|
|
147
|
+
HTML(
|
|
148
|
+
'<div style="padding: 10px; background-color: #d4edda; border: 1px solid #28a745; border-radius: 5px; color: #155724;">'
|
|
149
|
+
"<b>✓ Training started in background</b><br>"
|
|
150
|
+
"Use InteractiveController to monitor progress. The notebook kernel will remain responsive while training runs.<br>"
|
|
151
|
+
"<small>Tip: Interact with InteractiveController periodically to keep Colab active.</small>"
|
|
152
|
+
"</div>"
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
# Original blocking behavior for non-Colab environments
|
|
157
|
+
try:
|
|
158
|
+
controller = Controller(self.experiment_id, self.experiment_name)
|
|
159
|
+
controller.run_fit(param_config, create_model_fn, train_dataset, eval_dataset, num_chunks, seed)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
if hasattr(self, "logger"):
|
|
162
|
+
self.logger.opt(exception=True).error(f"Error running fit: {e}")
|
|
163
|
+
raise ExperimentException(f"Error running fit: {e}, traceback: {traceback.format_exc()}") from e
|
|
98
164
|
|
|
99
165
|
def get_results(self) -> pd.DataFrame:
|
|
100
166
|
"""
|
|
@@ -102,6 +168,19 @@ class Experiment:
|
|
|
102
168
|
"""
|
|
103
169
|
try:
|
|
104
170
|
runs_info_df = self.experiment_utils.get_runs_info()
|
|
171
|
+
|
|
172
|
+
# Check if there are any mlflow_run_ids before importing MLflow
|
|
173
|
+
has_mlflow_runs = (
|
|
174
|
+
runs_info_df.get("mlflow_run_id") is not None and runs_info_df["mlflow_run_id"].notna().any()
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if not has_mlflow_runs:
|
|
178
|
+
# No MLflow runs to fetch, return empty DataFrame
|
|
179
|
+
return pd.DataFrame(columns=["run_id", "step"])
|
|
180
|
+
|
|
181
|
+
# Lazy import - only import when we actually have MLflow runs to fetch
|
|
182
|
+
from rapidfireai.utils.mlflow_manager import MLflowManager
|
|
183
|
+
|
|
105
184
|
mlflow_manager = MLflowManager(MLFLOW_URL)
|
|
106
185
|
|
|
107
186
|
metrics_data = []
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
3
|
"main.css": "/static-files/static/css/main.702595df.css",
|
|
4
|
-
"main.js": "/static-files/static/js/main.
|
|
4
|
+
"main.js": "/static-files/static/js/main.58393d31.js",
|
|
5
5
|
"ml-model-trace-renderer.js": "/static-files/lib/notebook-trace-renderer/js/ml-model-trace-renderer.5490ebc325fe0f300ad9.js",
|
|
6
6
|
"static/js/6019.9025341e.chunk.js": "/static-files/static/js/6019.9025341e.chunk.js",
|
|
7
7
|
"static/js/6336.8153bc1c.chunk.js": "/static-files/static/js/6336.8153bc1c.chunk.js",
|
|
@@ -120,7 +120,7 @@
|
|
|
120
120
|
"static/media/chart-line.svg": "/static-files/static/media/chart-line.0adaa2036bb4eb5956db6d0c7e925a3d.svg",
|
|
121
121
|
"lib/notebook-trace-renderer/index.html": "/static-files/lib/notebook-trace-renderer/index.html",
|
|
122
122
|
"main.702595df.css.map": "/static-files/static/css/main.702595df.css.map",
|
|
123
|
-
"main.
|
|
123
|
+
"main.58393d31.js.map": "/static-files/static/js/main.58393d31.js.map",
|
|
124
124
|
"ml-model-trace-renderer.js.map": "/static-files/lib/notebook-trace-renderer/js/ml-model-trace-renderer.5490ebc325fe0f300ad9.js.map",
|
|
125
125
|
"6336.8153bc1c.chunk.js.map": "/static-files/static/js/6336.8153bc1c.chunk.js.map",
|
|
126
126
|
"9478.cbf55ef3.chunk.js.map": "/static-files/static/js/9478.cbf55ef3.chunk.js.map",
|
|
@@ -216,6 +216,6 @@
|
|
|
216
216
|
},
|
|
217
217
|
"entrypoints": [
|
|
218
218
|
"static/css/main.702595df.css",
|
|
219
|
-
"static/js/main.
|
|
219
|
+
"static/js/main.58393d31.js"
|
|
220
220
|
]
|
|
221
221
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"/><link rel="shortcut icon" href="./static-files/favicon.ico"/><meta name="theme-color" content="#000000"/><link rel="manifest" href="./static-files/manifest.json" crossorigin="use-credentials"/><title>RapidFire AI</title><script defer="defer" src="static-files/static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"/><link rel="shortcut icon" href="./static-files/favicon.ico"/><meta name="theme-color" content="#000000"/><link rel="manifest" href="./static-files/manifest.json" crossorigin="use-credentials"/><title>RapidFire AI</title><script defer="defer" src="static-files/static/js/main.58393d31.js"></script><link href="static-files/static/css/main.702595df.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root" class="mlflow-ui-container"></div><div id="modal" class="mlflow-ui-container"></div></body></html>
|