rapidfireai 0.10.2rc5__py3-none-any.whl → 0.11.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidfireai might be problematic. Click here for more details.

Files changed (36) hide show
  1. rapidfireai/automl/grid_search.py +4 -5
  2. rapidfireai/automl/model_config.py +41 -37
  3. rapidfireai/automl/random_search.py +21 -33
  4. rapidfireai/backend/controller.py +80 -161
  5. rapidfireai/backend/worker.py +26 -8
  6. rapidfireai/cli.py +171 -132
  7. rapidfireai/db/rf_db.py +1 -1
  8. rapidfireai/db/tables.sql +1 -1
  9. rapidfireai/dispatcher/dispatcher.py +3 -1
  10. rapidfireai/dispatcher/gunicorn.conf.py +1 -1
  11. rapidfireai/experiment.py +86 -7
  12. rapidfireai/frontend/build/asset-manifest.json +3 -3
  13. rapidfireai/frontend/build/index.html +1 -1
  14. rapidfireai/frontend/build/static/js/{main.1bf27639.js → main.58393d31.js} +3 -3
  15. rapidfireai/frontend/build/static/js/{main.1bf27639.js.map → main.58393d31.js.map} +1 -1
  16. rapidfireai/frontend/proxy_middleware.py +1 -1
  17. rapidfireai/ml/callbacks.py +85 -59
  18. rapidfireai/ml/trainer.py +42 -86
  19. rapidfireai/start.sh +117 -34
  20. rapidfireai/utils/constants.py +22 -1
  21. rapidfireai/utils/experiment_utils.py +87 -43
  22. rapidfireai/utils/interactive_controller.py +473 -0
  23. rapidfireai/utils/logging.py +1 -2
  24. rapidfireai/utils/metric_logger.py +346 -0
  25. rapidfireai/utils/mlflow_manager.py +0 -1
  26. rapidfireai/utils/ping.py +4 -2
  27. rapidfireai/utils/worker_manager.py +16 -6
  28. rapidfireai/version.py +2 -2
  29. {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/METADATA +7 -4
  30. {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/RECORD +36 -33
  31. tutorial_notebooks/rf-colab-tensorboard-tutorial.ipynb +314 -0
  32. /rapidfireai/frontend/build/static/js/{main.1bf27639.js.LICENSE.txt → main.58393d31.js.LICENSE.txt} +0 -0
  33. {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/WHEEL +0 -0
  34. {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/entry_points.txt +0 -0
  35. {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/licenses/LICENSE +0 -0
  36. {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/top_level.txt +0 -0
rapidfireai/experiment.py CHANGED
@@ -17,9 +17,11 @@ from rapidfireai.utils.constants import MLFLOW_URL
17
17
  from rapidfireai.utils.exceptions import ExperimentException
18
18
  from rapidfireai.utils.experiment_utils import ExperimentUtils
19
19
  from rapidfireai.utils.logging import RFLogger
20
- from rapidfireai.utils.mlflow_manager import MLflowManager
21
20
  from rapidfireai.version import __version__
22
21
 
22
+ # Note: MLflowManager is imported lazily in get_results() to avoid
23
+ # connection attempts when using tensorboard-only mode
24
+
23
25
 
24
26
  class Experiment:
25
27
  """Class to manage the entire experiment lifecycle."""
@@ -39,6 +41,7 @@ class Experiment:
39
41
  self.experiment_id: int | None = None
40
42
  self.log_server_process: mp.Process | None = None
41
43
  self.worker_processes: list[mp.Process] = []
44
+ self._training_thread: Any = None # Track background training thread (Colab only)
42
45
 
43
46
  # create db tables
44
47
  try:
@@ -88,13 +91,76 @@ class Experiment:
88
91
  seed: int = 42,
89
92
  ) -> None:
90
93
  """Run the fit"""
94
+
95
+ # Check if training is already running
96
+ if self._training_thread is not None and self._training_thread.is_alive():
97
+ print("⚠️ Training is already running in background. Please wait for it to complete.")
98
+ return
99
+
100
+ # Detect if running in Google Colab
91
101
  try:
92
- controller = Controller(self.experiment_id, self.experiment_name)
93
- controller.run_fit(param_config, create_model_fn, train_dataset, eval_dataset, num_chunks, seed)
94
- except Exception as e:
95
- if hasattr(self, "logger"):
96
- self.logger.opt(exception=True).error(f"Error running fit: {e}")
97
- raise ExperimentException(f"Error running fit: {e}, traceback: {traceback.format_exc()}") from e
102
+ import google.colab
103
+
104
+ in_colab = True
105
+ except ImportError:
106
+ in_colab = False
107
+
108
+ if in_colab:
109
+ # Run Controller in background thread to keep kernel responsive
110
+ import sys
111
+ import threading
112
+ from io import StringIO
113
+
114
+ from IPython.display import HTML, display
115
+
116
+ def _run_controller_background():
117
+ """Run controller in background thread with output suppression"""
118
+ # Suppress stdout to avoid print statements appearing in wrong cells
119
+ old_stdout = sys.stdout
120
+ sys.stdout = StringIO()
121
+
122
+ try:
123
+ controller = Controller(self.experiment_id, self.experiment_name)
124
+ controller.run_fit(param_config, create_model_fn, train_dataset, eval_dataset, num_chunks, seed)
125
+ except Exception as e:
126
+ # Restore stdout for error logging
127
+ sys.stdout = old_stdout
128
+ if hasattr(self, "logger"):
129
+ self.logger.opt(exception=True).error(f"Error in background training: {e}")
130
+ display(HTML(f'<p style="color: red; font-weight: bold;">❌ Error in background training: {e}</p>'))
131
+ finally:
132
+ # Restore stdout
133
+ sys.stdout = old_stdout
134
+ # Display completion message
135
+ display(
136
+ HTML(
137
+ '<p style="color: blue; font-weight: bold;">🎉 Training completed! Check InteractiveController for final results.</p>'
138
+ )
139
+ )
140
+ self._training_thread = None
141
+
142
+ self._training_thread = threading.Thread(target=_run_controller_background, daemon=True)
143
+ self._training_thread.start()
144
+
145
+ # Use IPython display for reliable output in Colab
146
+ display(
147
+ HTML(
148
+ '<div style="padding: 10px; background-color: #d4edda; border: 1px solid #28a745; border-radius: 5px; color: #155724;">'
149
+ "<b>✓ Training started in background</b><br>"
150
+ "Use InteractiveController to monitor progress. The notebook kernel will remain responsive while training runs.<br>"
151
+ "<small>Tip: Interact with InteractiveController periodically to keep Colab active.</small>"
152
+ "</div>"
153
+ )
154
+ )
155
+ else:
156
+ # Original blocking behavior for non-Colab environments
157
+ try:
158
+ controller = Controller(self.experiment_id, self.experiment_name)
159
+ controller.run_fit(param_config, create_model_fn, train_dataset, eval_dataset, num_chunks, seed)
160
+ except Exception as e:
161
+ if hasattr(self, "logger"):
162
+ self.logger.opt(exception=True).error(f"Error running fit: {e}")
163
+ raise ExperimentException(f"Error running fit: {e}, traceback: {traceback.format_exc()}") from e
98
164
 
99
165
  def get_results(self) -> pd.DataFrame:
100
166
  """
@@ -102,6 +168,19 @@ class Experiment:
102
168
  """
103
169
  try:
104
170
  runs_info_df = self.experiment_utils.get_runs_info()
171
+
172
+ # Check if there are any mlflow_run_ids before importing MLflow
173
+ has_mlflow_runs = (
174
+ runs_info_df.get("mlflow_run_id") is not None and runs_info_df["mlflow_run_id"].notna().any()
175
+ )
176
+
177
+ if not has_mlflow_runs:
178
+ # No MLflow runs to fetch, return empty DataFrame
179
+ return pd.DataFrame(columns=["run_id", "step"])
180
+
181
+ # Lazy import - only import when we actually have MLflow runs to fetch
182
+ from rapidfireai.utils.mlflow_manager import MLflowManager
183
+
105
184
  mlflow_manager = MLflowManager(MLFLOW_URL)
106
185
 
107
186
  metrics_data = []
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "/static-files/static/css/main.702595df.css",
4
- "main.js": "/static-files/static/js/main.1bf27639.js",
4
+ "main.js": "/static-files/static/js/main.58393d31.js",
5
5
  "ml-model-trace-renderer.js": "/static-files/lib/notebook-trace-renderer/js/ml-model-trace-renderer.5490ebc325fe0f300ad9.js",
6
6
  "static/js/6019.9025341e.chunk.js": "/static-files/static/js/6019.9025341e.chunk.js",
7
7
  "static/js/6336.8153bc1c.chunk.js": "/static-files/static/js/6336.8153bc1c.chunk.js",
@@ -120,7 +120,7 @@
120
120
  "static/media/chart-line.svg": "/static-files/static/media/chart-line.0adaa2036bb4eb5956db6d0c7e925a3d.svg",
121
121
  "lib/notebook-trace-renderer/index.html": "/static-files/lib/notebook-trace-renderer/index.html",
122
122
  "main.702595df.css.map": "/static-files/static/css/main.702595df.css.map",
123
- "main.1bf27639.js.map": "/static-files/static/js/main.1bf27639.js.map",
123
+ "main.58393d31.js.map": "/static-files/static/js/main.58393d31.js.map",
124
124
  "ml-model-trace-renderer.js.map": "/static-files/lib/notebook-trace-renderer/js/ml-model-trace-renderer.5490ebc325fe0f300ad9.js.map",
125
125
  "6336.8153bc1c.chunk.js.map": "/static-files/static/js/6336.8153bc1c.chunk.js.map",
126
126
  "9478.cbf55ef3.chunk.js.map": "/static-files/static/js/9478.cbf55ef3.chunk.js.map",
@@ -216,6 +216,6 @@
216
216
  },
217
217
  "entrypoints": [
218
218
  "static/css/main.702595df.css",
219
- "static/js/main.1bf27639.js"
219
+ "static/js/main.58393d31.js"
220
220
  ]
221
221
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"/><link rel="shortcut icon" href="./static-files/favicon.ico"/><meta name="theme-color" content="#000000"/><link rel="manifest" href="./static-files/manifest.json" crossorigin="use-credentials"/><title>RapidFire AI</title><script defer="defer" src="static-files/static/js/main.1bf27639.js"></script><link href="static-files/static/css/main.702595df.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root" class="mlflow-ui-container"></div><div id="modal" class="mlflow-ui-container"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><meta name="viewport" content="width=device-width,initial-scale=1,shrink-to-fit=no"/><link rel="shortcut icon" href="./static-files/favicon.ico"/><meta name="theme-color" content="#000000"/><link rel="manifest" href="./static-files/manifest.json" crossorigin="use-credentials"/><title>RapidFire AI</title><script defer="defer" src="static-files/static/js/main.58393d31.js"></script><link href="static-files/static/css/main.702595df.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root" class="mlflow-ui-container"></div><div id="modal" class="mlflow-ui-container"></div></body></html>