rapidfireai 0.10.2rc5__py3-none-any.whl → 0.11.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidfireai might be problematic. Click here for more details.
- rapidfireai/automl/grid_search.py +4 -5
- rapidfireai/automl/model_config.py +41 -37
- rapidfireai/automl/random_search.py +21 -33
- rapidfireai/backend/controller.py +80 -161
- rapidfireai/backend/worker.py +26 -8
- rapidfireai/cli.py +171 -132
- rapidfireai/db/rf_db.py +1 -1
- rapidfireai/db/tables.sql +1 -1
- rapidfireai/dispatcher/dispatcher.py +3 -1
- rapidfireai/dispatcher/gunicorn.conf.py +1 -1
- rapidfireai/experiment.py +86 -7
- rapidfireai/frontend/build/asset-manifest.json +3 -3
- rapidfireai/frontend/build/index.html +1 -1
- rapidfireai/frontend/build/static/js/{main.1bf27639.js → main.58393d31.js} +3 -3
- rapidfireai/frontend/build/static/js/{main.1bf27639.js.map → main.58393d31.js.map} +1 -1
- rapidfireai/frontend/proxy_middleware.py +1 -1
- rapidfireai/ml/callbacks.py +85 -59
- rapidfireai/ml/trainer.py +42 -86
- rapidfireai/start.sh +117 -34
- rapidfireai/utils/constants.py +22 -1
- rapidfireai/utils/experiment_utils.py +87 -43
- rapidfireai/utils/interactive_controller.py +473 -0
- rapidfireai/utils/logging.py +1 -2
- rapidfireai/utils/metric_logger.py +346 -0
- rapidfireai/utils/mlflow_manager.py +0 -1
- rapidfireai/utils/ping.py +4 -2
- rapidfireai/utils/worker_manager.py +16 -6
- rapidfireai/version.py +2 -2
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/METADATA +7 -4
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/RECORD +36 -33
- tutorial_notebooks/rf-colab-tensorboard-tutorial.ipynb +314 -0
- /rapidfireai/frontend/build/static/js/{main.1bf27639.js.LICENSE.txt → main.58393d31.js.LICENSE.txt} +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/WHEEL +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/entry_points.txt +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/licenses/LICENSE +0 -0
- {rapidfireai-0.10.2rc5.dist-info → rapidfireai-0.11.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metric Logger abstraction layer for RapidFire AI.
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for logging metrics to different backends
|
|
5
|
+
(MLflow, TensorBoard, or both). This abstraction allows minimal changes to core ML code
|
|
6
|
+
while supporting multiple tracking systems.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
# Note: MLflowManager is imported lazily in MLflowMetricLogger to avoid
|
|
14
|
+
# connection attempts when using tensorboard-only mode
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MetricLogger(ABC):
|
|
18
|
+
"""
|
|
19
|
+
Abstract base class for metric logging.
|
|
20
|
+
|
|
21
|
+
Provides a unified interface for logging metrics, parameters, and managing runs
|
|
22
|
+
across different tracking backends (MLflow, TensorBoard, etc.).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def create_run(self, run_name: str) -> str:
|
|
27
|
+
"""
|
|
28
|
+
Create a new run and return run_id.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
run_name: Name for the run
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Run ID string
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def log_param(self, run_id: str, key: str, value: str) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Log a parameter to a specific run.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
run_id: Run identifier
|
|
45
|
+
key: Parameter name
|
|
46
|
+
value: Parameter value
|
|
47
|
+
"""
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def log_metric(self, run_id: str, key: str, value: float, step: Optional[int] = None) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Log a metric to a specific run.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
run_id: Run identifier
|
|
57
|
+
key: Metric name
|
|
58
|
+
value: Metric value
|
|
59
|
+
step: Optional step number for the metric
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def end_run(self, run_id: str) -> None:
|
|
65
|
+
"""
|
|
66
|
+
End a specific run.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
run_id: Run identifier
|
|
70
|
+
"""
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def get_run_metrics(self, run_id: str) -> dict:
|
|
75
|
+
"""
|
|
76
|
+
Get all metrics for a specific run.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
run_id: Run identifier
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Dictionary of metrics
|
|
83
|
+
"""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
def delete_run(self, run_id: str) -> None:
|
|
87
|
+
"""
|
|
88
|
+
Delete a specific run (optional, not all backends support this).
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
run_id: Run identifier
|
|
92
|
+
"""
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
def clear_context(self) -> None:
|
|
96
|
+
"""Clear the tracking context (optional, not all backends need this)."""
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class MLflowMetricLogger(MetricLogger):
|
|
101
|
+
"""
|
|
102
|
+
MLflow implementation of MetricLogger.
|
|
103
|
+
|
|
104
|
+
Wraps the existing MLflowManager to provide the MetricLogger interface.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(self, tracking_uri: str):
|
|
108
|
+
"""
|
|
109
|
+
Initialize MLflow metric logger.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
tracking_uri: MLflow tracking server URI
|
|
113
|
+
"""
|
|
114
|
+
# Lazy import to avoid connection attempts in tensorboard-only mode
|
|
115
|
+
from rapidfireai.utils.mlflow_manager import MLflowManager
|
|
116
|
+
self.mlflow_manager = MLflowManager(tracking_uri)
|
|
117
|
+
|
|
118
|
+
def get_experiment(self, experiment_name: str) -> str:
|
|
119
|
+
"""
|
|
120
|
+
Get existing experiment by name.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
experiment_name: Name of the experiment
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Experiment ID
|
|
127
|
+
"""
|
|
128
|
+
return self.mlflow_manager.get_experiment(experiment_name)
|
|
129
|
+
|
|
130
|
+
def create_run(self, run_name: str) -> str:
|
|
131
|
+
"""Create a new MLflow run."""
|
|
132
|
+
return self.mlflow_manager.create_run(run_name)
|
|
133
|
+
|
|
134
|
+
def log_param(self, run_id: str, key: str, value: str) -> None:
|
|
135
|
+
"""Log a parameter to MLflow."""
|
|
136
|
+
self.mlflow_manager.log_param(run_id, key, value)
|
|
137
|
+
|
|
138
|
+
def log_metric(self, run_id: str, key: str, value: float, step: Optional[int] = None) -> None:
|
|
139
|
+
"""Log a metric to MLflow."""
|
|
140
|
+
self.mlflow_manager.log_metric(run_id, key, value, step=step)
|
|
141
|
+
|
|
142
|
+
def end_run(self, run_id: str) -> None:
|
|
143
|
+
"""End an MLflow run."""
|
|
144
|
+
self.mlflow_manager.end_run(run_id)
|
|
145
|
+
|
|
146
|
+
def get_run_metrics(self, run_id: str) -> dict:
|
|
147
|
+
"""Get metrics from MLflow."""
|
|
148
|
+
return self.mlflow_manager.get_run_metrics(run_id)
|
|
149
|
+
|
|
150
|
+
def delete_run(self, run_id: str) -> None:
|
|
151
|
+
"""Delete an MLflow run."""
|
|
152
|
+
self.mlflow_manager.delete_run(run_id)
|
|
153
|
+
|
|
154
|
+
def clear_context(self) -> None:
|
|
155
|
+
"""Clear MLflow context."""
|
|
156
|
+
self.mlflow_manager.clear_context()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class TensorBoardMetricLogger(MetricLogger):
|
|
160
|
+
"""
|
|
161
|
+
TensorBoard implementation of MetricLogger.
|
|
162
|
+
|
|
163
|
+
Uses torch.utils.tensorboard.SummaryWriter to log metrics to TensorBoard.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def __init__(self, log_dir: str):
|
|
167
|
+
"""
|
|
168
|
+
Initialize TensorBoard metric logger.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
log_dir: Directory for TensorBoard logs
|
|
172
|
+
"""
|
|
173
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
174
|
+
|
|
175
|
+
self.log_dir = Path(log_dir)
|
|
176
|
+
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
177
|
+
self.writers = {} # Map run_id -> SummaryWriter
|
|
178
|
+
|
|
179
|
+
def create_run(self, run_name: str) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Create a new TensorBoard run.
|
|
182
|
+
|
|
183
|
+
For TensorBoard, we use run_name as the run_id and create a subdirectory
|
|
184
|
+
in the log directory.
|
|
185
|
+
"""
|
|
186
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
187
|
+
|
|
188
|
+
run_log_dir = self.log_dir / run_name
|
|
189
|
+
run_log_dir.mkdir(parents=True, exist_ok=True)
|
|
190
|
+
|
|
191
|
+
# Create SummaryWriter for this run
|
|
192
|
+
writer = SummaryWriter(log_dir=str(run_log_dir))
|
|
193
|
+
self.writers[run_name] = writer
|
|
194
|
+
|
|
195
|
+
return run_name
|
|
196
|
+
|
|
197
|
+
def log_param(self, run_id: str, key: str, value: str) -> None:
|
|
198
|
+
"""
|
|
199
|
+
Log a parameter to TensorBoard.
|
|
200
|
+
|
|
201
|
+
TensorBoard doesn't have native parameter logging, so we log as text.
|
|
202
|
+
"""
|
|
203
|
+
if run_id not in self.writers:
|
|
204
|
+
self.create_run(run_id)
|
|
205
|
+
|
|
206
|
+
writer = self.writers[run_id]
|
|
207
|
+
writer.add_text(f"params/{key}", str(value), global_step=0)
|
|
208
|
+
writer.flush()
|
|
209
|
+
|
|
210
|
+
def log_metric(self, run_id: str, key: str, value: float, step: Optional[int] = None) -> None:
|
|
211
|
+
"""
|
|
212
|
+
Log a metric to TensorBoard.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
run_id: Run identifier
|
|
216
|
+
key: Metric name
|
|
217
|
+
value: Metric value
|
|
218
|
+
step: Step number (required for TensorBoard time series)
|
|
219
|
+
"""
|
|
220
|
+
if run_id not in self.writers:
|
|
221
|
+
self.create_run(run_id)
|
|
222
|
+
|
|
223
|
+
writer = self.writers[run_id]
|
|
224
|
+
# Use step=0 if not provided (fallback)
|
|
225
|
+
writer.add_scalar(key, value, global_step=step if step is not None else 0)
|
|
226
|
+
# Flush immediately to ensure real-time updates
|
|
227
|
+
writer.flush()
|
|
228
|
+
|
|
229
|
+
def end_run(self, run_id: str) -> None:
|
|
230
|
+
"""End a TensorBoard run by closing the writer."""
|
|
231
|
+
if run_id in self.writers:
|
|
232
|
+
self.writers[run_id].close()
|
|
233
|
+
del self.writers[run_id]
|
|
234
|
+
|
|
235
|
+
def get_run_metrics(self, run_id: str) -> dict:
|
|
236
|
+
"""
|
|
237
|
+
Get metrics from TensorBoard.
|
|
238
|
+
|
|
239
|
+
Note: TensorBoard doesn't provide easy API access to logged metrics.
|
|
240
|
+
This returns an empty dict. For viewing metrics, use TensorBoard UI.
|
|
241
|
+
"""
|
|
242
|
+
return {}
|
|
243
|
+
|
|
244
|
+
def __del__(self):
|
|
245
|
+
"""Clean up all writers on deletion."""
|
|
246
|
+
for writer in self.writers.values():
|
|
247
|
+
writer.close()
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class DualMetricLogger(MetricLogger):
|
|
251
|
+
"""
|
|
252
|
+
Dual implementation that logs to both MLflow and TensorBoard.
|
|
253
|
+
|
|
254
|
+
This allows users to benefit from both tracking systems simultaneously:
|
|
255
|
+
- MLflow for experiment comparison and model registry
|
|
256
|
+
- TensorBoard for real-time training visualization (especially useful in Colab)
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
def __init__(self, mlflow_tracking_uri: str, tensorboard_log_dir: str):
|
|
260
|
+
"""
|
|
261
|
+
Initialize dual metric logger.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
mlflow_tracking_uri: MLflow tracking server URI
|
|
265
|
+
tensorboard_log_dir: Directory for TensorBoard logs
|
|
266
|
+
"""
|
|
267
|
+
self.mlflow_logger = MLflowMetricLogger(mlflow_tracking_uri)
|
|
268
|
+
self.tensorboard_logger = TensorBoardMetricLogger(tensorboard_log_dir)
|
|
269
|
+
|
|
270
|
+
def get_experiment(self, experiment_name: str) -> str:
|
|
271
|
+
"""Get experiment from MLflow (TensorBoard doesn't have experiments)."""
|
|
272
|
+
return self.mlflow_logger.get_experiment(experiment_name)
|
|
273
|
+
|
|
274
|
+
def create_run(self, run_name: str) -> str:
|
|
275
|
+
"""Create run in both MLflow and TensorBoard."""
|
|
276
|
+
mlflow_run_id = self.mlflow_logger.create_run(run_name)
|
|
277
|
+
self.tensorboard_logger.create_run(run_name)
|
|
278
|
+
# Return MLflow run_id as the canonical ID
|
|
279
|
+
return mlflow_run_id
|
|
280
|
+
|
|
281
|
+
def log_param(self, run_id: str, key: str, value: str) -> None:
|
|
282
|
+
"""Log parameter to both backends."""
|
|
283
|
+
self.mlflow_logger.log_param(run_id, key, value)
|
|
284
|
+
self.tensorboard_logger.log_param(run_id, key, value)
|
|
285
|
+
|
|
286
|
+
def log_metric(self, run_id: str, key: str, value: float, step: Optional[int] = None) -> None:
|
|
287
|
+
"""Log metric to both backends."""
|
|
288
|
+
self.mlflow_logger.log_metric(run_id, key, value, step=step)
|
|
289
|
+
self.tensorboard_logger.log_metric(run_id, key, value, step=step)
|
|
290
|
+
|
|
291
|
+
def end_run(self, run_id: str) -> None:
|
|
292
|
+
"""End run in both backends."""
|
|
293
|
+
self.mlflow_logger.end_run(run_id)
|
|
294
|
+
self.tensorboard_logger.end_run(run_id)
|
|
295
|
+
|
|
296
|
+
def get_run_metrics(self, run_id: str) -> dict:
|
|
297
|
+
"""Get metrics from MLflow (primary source)."""
|
|
298
|
+
return self.mlflow_logger.get_run_metrics(run_id)
|
|
299
|
+
|
|
300
|
+
def delete_run(self, run_id: str) -> None:
|
|
301
|
+
"""Delete run from MLflow (TensorBoard logs remain on disk)."""
|
|
302
|
+
self.mlflow_logger.delete_run(run_id)
|
|
303
|
+
|
|
304
|
+
def clear_context(self) -> None:
|
|
305
|
+
"""Clear context in both backends."""
|
|
306
|
+
self.mlflow_logger.clear_context()
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def create_metric_logger(
|
|
310
|
+
backend: str,
|
|
311
|
+
mlflow_tracking_uri: Optional[str] = None,
|
|
312
|
+
tensorboard_log_dir: Optional[str] = None,
|
|
313
|
+
) -> MetricLogger:
|
|
314
|
+
"""
|
|
315
|
+
Factory function to create the appropriate metric logger.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
backend: Tracking backend to use ('mlflow', 'tensorboard', or 'both')
|
|
319
|
+
mlflow_tracking_uri: MLflow tracking server URI (required if backend includes MLflow)
|
|
320
|
+
tensorboard_log_dir: TensorBoard log directory (required if backend includes TensorBoard)
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
MetricLogger instance
|
|
324
|
+
|
|
325
|
+
Raises:
|
|
326
|
+
ValueError: If backend is invalid or required parameters are missing
|
|
327
|
+
"""
|
|
328
|
+
backend = backend.lower()
|
|
329
|
+
|
|
330
|
+
if backend == "mlflow":
|
|
331
|
+
if not mlflow_tracking_uri:
|
|
332
|
+
raise ValueError("mlflow_tracking_uri required for MLflow backend")
|
|
333
|
+
return MLflowMetricLogger(mlflow_tracking_uri)
|
|
334
|
+
|
|
335
|
+
elif backend == "tensorboard":
|
|
336
|
+
if not tensorboard_log_dir:
|
|
337
|
+
raise ValueError("tensorboard_log_dir required for TensorBoard backend")
|
|
338
|
+
return TensorBoardMetricLogger(tensorboard_log_dir)
|
|
339
|
+
|
|
340
|
+
elif backend == "both":
|
|
341
|
+
if not mlflow_tracking_uri or not tensorboard_log_dir:
|
|
342
|
+
raise ValueError("Both mlflow_tracking_uri and tensorboard_log_dir required for dual backend")
|
|
343
|
+
return DualMetricLogger(mlflow_tracking_uri, tensorboard_log_dir)
|
|
344
|
+
|
|
345
|
+
else:
|
|
346
|
+
raise ValueError(f"Invalid backend: {backend}. Must be 'mlflow', 'tensorboard', or 'both'")
|
rapidfireai/utils/ping.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
|
-
import socket
|
|
3
2
|
import argparse
|
|
3
|
+
import socket
|
|
4
|
+
|
|
4
5
|
|
|
5
6
|
def ping_server(server: str, port: int, timeout=3):
|
|
6
|
-
"""ping server:port
|
|
7
|
+
"""ping server:port"""
|
|
7
8
|
try:
|
|
8
9
|
socket.setdefaulttimeout(timeout)
|
|
9
10
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
@@ -14,6 +15,7 @@ def ping_server(server: str, port: int, timeout=3):
|
|
|
14
15
|
s.close()
|
|
15
16
|
return True
|
|
16
17
|
|
|
18
|
+
|
|
17
19
|
if __name__ == "__main__":
|
|
18
20
|
parser = argparse.ArgumentParser(description="Ping a server port")
|
|
19
21
|
parser.add_argument("server", type=str, help="Server to ping")
|
|
@@ -84,7 +84,14 @@ class WorkerManager:
|
|
|
84
84
|
while not self.shutdown_event.is_set():
|
|
85
85
|
try:
|
|
86
86
|
# Check if parent process is still alive
|
|
87
|
-
|
|
87
|
+
try:
|
|
88
|
+
os.getpgid(self.parent_pid)
|
|
89
|
+
except PermissionError:
|
|
90
|
+
# Fallback for restricted environments (e.g., Colab)
|
|
91
|
+
# Use psutil to check if parent process exists
|
|
92
|
+
import psutil
|
|
93
|
+
if not psutil.pid_exists(self.parent_pid):
|
|
94
|
+
raise ProcessLookupError("Parent process no longer exists")
|
|
88
95
|
time.sleep(self.parent_check_interval)
|
|
89
96
|
except ProcessLookupError:
|
|
90
97
|
self.logger.debug(f"Parent process {self.parent_pid} died, shutting down workers...")
|
|
@@ -101,11 +108,14 @@ class WorkerManager:
|
|
|
101
108
|
"""
|
|
102
109
|
self.logger.debug(f"Creating {self.num_workers} worker processes...")
|
|
103
110
|
|
|
104
|
-
# Create new process group
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
111
|
+
# Create new process group (may not be permitted in restricted environments like Colab)
|
|
112
|
+
try:
|
|
113
|
+
os.setpgrp()
|
|
114
|
+
self.process_group_id = os.getpgrp()
|
|
115
|
+
self.logger.debug(f"Starting worker processes in process group {self.process_group_id}")
|
|
116
|
+
except PermissionError:
|
|
117
|
+
self.logger.debug("Cannot create process group (restricted environment) - will use individual process termination")
|
|
118
|
+
self.process_group_id = None
|
|
109
119
|
|
|
110
120
|
worker_ids = []
|
|
111
121
|
|
rapidfireai/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rapidfireai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.1rc1
|
|
4
4
|
Summary: RapidFire AI: Rapid Experimentation Engine for Customizing LLMs
|
|
5
5
|
Author-email: "RapidFire AI Inc." <support@rapidfire.ai>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -22,6 +22,7 @@ Requires-Python: >=3.12
|
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
Requires-Dist: pandas>=2.3.1
|
|
25
|
+
Requires-Dist: psutil>=5.9.0
|
|
25
26
|
Requires-Dist: torch>=2.8.0
|
|
26
27
|
Requires-Dist: transformers>=4.55.2
|
|
27
28
|
Requires-Dist: peft>=0.17.0
|
|
@@ -40,11 +41,13 @@ Requires-Dist: requests>=2.32.5
|
|
|
40
41
|
Requires-Dist: loguru>=0.7.3
|
|
41
42
|
Requires-Dist: ipython>=8.37.0
|
|
42
43
|
Requires-Dist: jupyter>=1.1.1
|
|
43
|
-
Requires-Dist: ipywidgets
|
|
44
|
+
Requires-Dist: ipywidgets<9.0.0,>=7.3.4
|
|
44
45
|
Requires-Dist: uv>=0.8.14
|
|
46
|
+
Requires-Dist: tensorboard>=2.11.0
|
|
45
47
|
Provides-Extra: dev
|
|
46
48
|
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
47
49
|
Requires-Dist: pytest-cov>=2.0; extra == "dev"
|
|
50
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
|
|
48
51
|
Requires-Dist: black>=21.0; extra == "dev"
|
|
49
52
|
Requires-Dist: flake8>=3.8; extra == "dev"
|
|
50
53
|
Requires-Dist: mypy>=0.800; extra == "dev"
|
|
@@ -147,7 +150,7 @@ If you encounter port conflicts, you can kill existing processes:
|
|
|
147
150
|
|
|
148
151
|
```bash
|
|
149
152
|
lsof -t -i:5002 | xargs kill -9 # mlflow
|
|
150
|
-
lsof -t -i:
|
|
153
|
+
lsof -t -i:8081 | xargs kill -9 # dispatcher
|
|
151
154
|
lsof -t -i:3000 | xargs kill -9 # frontend server
|
|
152
155
|
```
|
|
153
156
|
|
|
@@ -292,7 +295,7 @@ chmod +x ./rapidfireai/start_dev.sh
|
|
|
292
295
|
# VSCode can port-forward localhost:3000 where the rf-frontend server will be running
|
|
293
296
|
|
|
294
297
|
# for port clash issues -
|
|
295
|
-
lsof -t -i:
|
|
298
|
+
lsof -t -i:8081 | xargs kill -9 # dispatcher
|
|
296
299
|
lsof -t -i:5002 | xargs kill -9 # mlflow
|
|
297
300
|
lsof -t -i:3000 | xargs kill -9 # frontend
|
|
298
301
|
```
|
|
@@ -1,31 +1,31 @@
|
|
|
1
1
|
rapidfireai/__init__.py,sha256=mSV8CiaJ9LwjCpMdHSBd9bM-JBijDx-lc8hGny1KEsQ,368
|
|
2
|
-
rapidfireai/cli.py,sha256=
|
|
3
|
-
rapidfireai/experiment.py,sha256=
|
|
4
|
-
rapidfireai/start.sh,sha256=
|
|
5
|
-
rapidfireai/version.py,sha256=
|
|
2
|
+
rapidfireai/cli.py,sha256=EzIqiIVTvXHZA0MP51G3SNbm8fq4hg--lsOg3vlb71U,16766
|
|
3
|
+
rapidfireai/experiment.py,sha256=HTZCDD3VfH-DP7nFmD6xGHy3SpttZ6Wd3-tAZGr0HUU,10612
|
|
4
|
+
rapidfireai/start.sh,sha256=HGl-J-CpkZuwFl5iVMzPMR_Zf0OGJqPfJFmPKigu_80,24384
|
|
5
|
+
rapidfireai/version.py,sha256=r80_4K2SERzBy0aLKlQ2IBxufsANR-PhibCNUprhmqM,107
|
|
6
6
|
rapidfireai/automl/__init__.py,sha256=QnzWa33i9aMp1NatoQYJFPrGZchtTUAPkgSOyyDXbSU,501
|
|
7
7
|
rapidfireai/automl/base.py,sha256=pF6NQMr8DeEFm4PBbmbUbNAtP0S-yDfeUnKMqz2D9Zk,1947
|
|
8
8
|
rapidfireai/automl/datatypes.py,sha256=rbocXidGekpeukKQuMSZLFK6h6h4PIo1Fvre2FWmhqU,1470
|
|
9
|
-
rapidfireai/automl/grid_search.py,sha256=
|
|
10
|
-
rapidfireai/automl/model_config.py,sha256=
|
|
11
|
-
rapidfireai/automl/random_search.py,sha256=
|
|
9
|
+
rapidfireai/automl/grid_search.py,sha256=S_lxFuSt0MZcztSwzUMg_lDGmneybXZ8VqoUzdGIlfw,6127
|
|
10
|
+
rapidfireai/automl/model_config.py,sha256=eqRbUxcKj-O_Cxk3A1sW4pFMJQgjs_75m81EtJKArQM,3665
|
|
11
|
+
rapidfireai/automl/random_search.py,sha256=VzDHFM-hJ3KBnLuSz6ExOofeEOMJbHkTgMYvXi2dNHg,5324
|
|
12
12
|
rapidfireai/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
rapidfireai/backend/chunks.py,sha256=83n_jz08lxEHi0FZ_xqV-ZQxgzzp3XQdCyW1zvW8Pb0,6981
|
|
14
|
-
rapidfireai/backend/controller.py,sha256=
|
|
14
|
+
rapidfireai/backend/controller.py,sha256=jha417YwPBbzVuSSCDW_s5Wh6NawUCpPxi4k8Q8N7P8,32817
|
|
15
15
|
rapidfireai/backend/scheduler.py,sha256=D_C6XXZ6D7UT8-N5XskeagG2e7Il8DnN3s788y9vc1Y,6026
|
|
16
|
-
rapidfireai/backend/worker.py,sha256=
|
|
16
|
+
rapidfireai/backend/worker.py,sha256=sCWKB0NrarfIFJVNaNaIbkxrf5_1it9HxbBRAWmQq_0,13106
|
|
17
17
|
rapidfireai/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
rapidfireai/db/db_interface.py,sha256=891uxZP00hVnqd8jVf_jL84Ui85BO06JFdYVLzqxaPU,5237
|
|
19
|
-
rapidfireai/db/rf_db.py,sha256=
|
|
20
|
-
rapidfireai/db/tables.sql,sha256=
|
|
21
|
-
rapidfireai/dispatcher/dispatcher.py,sha256=
|
|
22
|
-
rapidfireai/dispatcher/gunicorn.conf.py,sha256=
|
|
19
|
+
rapidfireai/db/rf_db.py,sha256=0GptQv0PlS13dUEaxcBfV78xo1XGr8cyxPCSFnFcVTE,26499
|
|
20
|
+
rapidfireai/db/tables.sql,sha256=QRZs9iu4fkEfM9J--Xrdu-ar1zZOUdWmPWrUYL1o9GY,1930
|
|
21
|
+
rapidfireai/dispatcher/dispatcher.py,sha256=kAUZw9eK_c9sfp9LHjbCUVDWO6QWBWjyP887PklqC04,16158
|
|
22
|
+
rapidfireai/dispatcher/gunicorn.conf.py,sha256=CfS1lPfDK5ggl63c5qYtydfNC3TNJ0_irEI0qCL37d0,837
|
|
23
23
|
rapidfireai/frontend/package.json,sha256=sVJmBQ0-1b7RW_h_jD-kKWL3QdhlTmdx12f80nxNd8w,8778
|
|
24
|
-
rapidfireai/frontend/proxy_middleware.py,sha256=
|
|
24
|
+
rapidfireai/frontend/proxy_middleware.py,sha256=FLOmJbzzEq19HzuJvBmneCI4MKNUWGtNEYZAfCpSCBs,8537
|
|
25
25
|
rapidfireai/frontend/server.py,sha256=uTCzigwy2v14X1g-PHs9PsJ_LlJFmd2HLTi6A0MFfL0,984
|
|
26
|
-
rapidfireai/frontend/build/asset-manifest.json,sha256=
|
|
26
|
+
rapidfireai/frontend/build/asset-manifest.json,sha256=hUsuVYGmwulDfi8HQx019EokmC87CL_DF3Rm2SgNG2k,19906
|
|
27
27
|
rapidfireai/frontend/build/favicon.ico,sha256=t1wT9kqUZ_H6n5WgN9FTZBoN1Fb_gBBBBb1GJm0j1tg,15406
|
|
28
|
-
rapidfireai/frontend/build/index.html,sha256=
|
|
28
|
+
rapidfireai/frontend/build/index.html,sha256=u5CofDvF2BkqqnjSJ4VADwpwpve-X9XRs8-e1bQa4WE,707
|
|
29
29
|
rapidfireai/frontend/build/manifest.json,sha256=ShxPBqSqfpMC7XKD10XThODp-eVtCalJ2SuJbLxN6wQ,317
|
|
30
30
|
rapidfireai/frontend/build/lib/notebook-trace-renderer/index.html,sha256=TExHVdUiSmR8VgCyjlxkW4OHk2ZW6XjOMPqSP2ABEcM,205
|
|
31
31
|
rapidfireai/frontend/build/lib/notebook-trace-renderer/js/ml-model-trace-renderer.5490ebc325fe0f300ad9.js,sha256=WmABEAnu39aZ3UPTdoYYkgscNfhidEBbEd6-Q1TF_xY,5096
|
|
@@ -230,9 +230,9 @@ rapidfireai/frontend/build/static/js/9831.43bfa1a6.chunk.js,sha256=FgAwC9cUR31aY
|
|
|
230
230
|
rapidfireai/frontend/build/static/js/9831.43bfa1a6.chunk.js.map,sha256=zqQ06JHJuOM7xYXWVBbvT5vnPNhY5X-h5B7zHysx3lw,11180
|
|
231
231
|
rapidfireai/frontend/build/static/js/experimentPage.230db0b3.chunk.js,sha256=QsBAHGwXMz1fDE0nwtviNM2l6gaiL2ZL3K6kNYDzqos,9322
|
|
232
232
|
rapidfireai/frontend/build/static/js/experimentPage.230db0b3.chunk.js.map,sha256=ZMZJzwksHJ5epKDudSY7UesbhRmg_-QVVzRykc4vt30,40878
|
|
233
|
-
rapidfireai/frontend/build/static/js/main.
|
|
234
|
-
rapidfireai/frontend/build/static/js/main.
|
|
235
|
-
rapidfireai/frontend/build/static/js/main.
|
|
233
|
+
rapidfireai/frontend/build/static/js/main.58393d31.js,sha256=hk0_IVrF9Qj0q3Vr5UzRPjo7216vbD3AaF4GhH6DoHM,2605534
|
|
234
|
+
rapidfireai/frontend/build/static/js/main.58393d31.js.LICENSE.txt,sha256=0aWPNna3cImqZ2xi_GQ1LK0CTZvzZ2zFrD_mwHYMmko,6269
|
|
235
|
+
rapidfireai/frontend/build/static/js/main.58393d31.js.map,sha256=_oau_iXX3pIJsL0LDLgePZs2jD1atbIV3Tg7YYC_nlM,11862186
|
|
236
236
|
rapidfireai/frontend/build/static/media/404-overflow.fad9a31861b0afba6f921ebb8e769688.svg,sha256=brmh2WLNo6Kp2QZKU5MezMUUB9sScckdhTTkee8FxcU,4637
|
|
237
237
|
rapidfireai/frontend/build/static/media/RapidFire_Square_Bug.27ceb48296314a4bc0d4.png,sha256=2Lyp_ZlHCfOnzC2KoOvV9ahZPLad_XzWb96F1BvWn5I,20682
|
|
238
238
|
rapidfireai/frontend/build/static/media/chart-bar.0fd4a63680fba840a7b69fbf07969f79.svg,sha256=TyChv8IKe0v04YBeS7qz-IFOGjn0YJhjdflz3FxdwRc,498
|
|
@@ -257,31 +257,34 @@ rapidfireai/frontend/build/static/media/registered-model-grey-ok.8274b58d39504c8
|
|
|
257
257
|
rapidfireai/frontend/build/static/media/versions-empty.3e2d8bb8c5b8ddd0a19a34890b36ad41.svg,sha256=qknDzhv6z7j1AONJhHgV5Xsi84J3iNw2gLeE2v1oghA,23148
|
|
258
258
|
rapidfireai/frontend/build/static/media/warning.290a3b14118933547965e91ea61c5a61.svg,sha256=aR7OrJItpEixvhf_sl1vuTsEPZLeizUm2xR12dKxfrI,646
|
|
259
259
|
rapidfireai/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
260
|
-
rapidfireai/ml/callbacks.py,sha256=
|
|
260
|
+
rapidfireai/ml/callbacks.py,sha256=RKm_XGOqHw0ShhPIJKstfseaFeK_QRJEWHxITnfkpUg,11236
|
|
261
261
|
rapidfireai/ml/checkpoint_utils.py,sha256=T6BzkvQmL77tvOiif-06oPyM-ikEDsj8ezbNELSzjM0,23422
|
|
262
|
-
rapidfireai/ml/trainer.py,sha256=
|
|
262
|
+
rapidfireai/ml/trainer.py,sha256=PYb4SPgDs5LuYO93upBRSCllELQZSMjOWgrBlqmzYiQ,14985
|
|
263
263
|
rapidfireai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
264
264
|
rapidfireai/utils/automl_utils.py,sha256=4IeGZyYRxSdoKk1dBcTI5_JRms70TyiWL9F6Gta31BI,2004
|
|
265
|
-
rapidfireai/utils/constants.py,sha256
|
|
265
|
+
rapidfireai/utils/constants.py,sha256=lMQraeikDhlQBjsAp3T_00moxe9wCOZG5m_wqp6sisE,4088
|
|
266
266
|
rapidfireai/utils/datapaths.py,sha256=PKgZu_qWx2z6QBIfmzmjY0lWG79GaU6W3577_34yX10,2554
|
|
267
267
|
rapidfireai/utils/exceptions.py,sha256=RA6kMSV3nCz3oE-yhuNLDEneDqTUrZC6N0AkSRBdAlg,2002
|
|
268
|
-
rapidfireai/utils/experiment_utils.py,sha256=
|
|
269
|
-
rapidfireai/utils/
|
|
270
|
-
rapidfireai/utils/
|
|
271
|
-
rapidfireai/utils/
|
|
268
|
+
rapidfireai/utils/experiment_utils.py,sha256=ZbKb2argSlCpFpqVoFO5_KwMK9ql1lVme3QZamDPePM,17440
|
|
269
|
+
rapidfireai/utils/interactive_controller.py,sha256=lCpjkDWZ9ixkf8TEu1FTsjIa4T1fRUDomfM7wJgaSnk,17862
|
|
270
|
+
rapidfireai/utils/logging.py,sha256=jq87rfMd0aQMnfh6BvXKTn5WtoZorizS_83uk16lYBo,3026
|
|
271
|
+
rapidfireai/utils/metric_logger.py,sha256=J8PxEWwaRSG5x07e8tz-mqYYRQchNQSTW9lm7UTOYFU,11078
|
|
272
|
+
rapidfireai/utils/mlflow_manager.py,sha256=ocUCLMkA3LDaHc0809DqUmJK8hAmoPFo3qmRQyvmyQk,4988
|
|
273
|
+
rapidfireai/utils/ping.py,sha256=9HJmiIBOc5HtgxSrEWEkbLj08nXHa14Smf-91BmYNNQ,977
|
|
272
274
|
rapidfireai/utils/serialize.py,sha256=_A9egs2uhlYNGT3Ntv2fzH7rwp6I-GGVoS4ViY3sufU,401
|
|
273
275
|
rapidfireai/utils/shm_manager.py,sha256=MlK-lKR-GjQ7G161U65922fh19YqfZmDabfi5P_kCpw,23323
|
|
274
276
|
rapidfireai/utils/trainer_config.py,sha256=xrEcx-jW3NnSb6qCTTQT7OhLO9R9hWvNIHd2AsymQ-g,629
|
|
275
|
-
rapidfireai/utils/worker_manager.py,sha256=
|
|
276
|
-
rapidfireai-0.
|
|
277
|
+
rapidfireai/utils/worker_manager.py,sha256=VomHVgn7GgXghX1pVaRdI9HVEnJjbnhpyJT0-IKBUt8,8643
|
|
278
|
+
rapidfireai-0.11.1rc1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
279
|
+
tutorial_notebooks/rf-colab-tensorboard-tutorial.ipynb,sha256=u82tZDxuJ__ij8lMquIwRSckibTDd1p-3dWS8eWAYwQ,15893
|
|
277
280
|
tutorial_notebooks/rf-tutorial-dpo-alignment-lite.ipynb,sha256=ZEoP6hmBuwWPE5BgJXUAwBgfaAs3-s0Jj3hbcCl6buo,15082
|
|
278
281
|
tutorial_notebooks/rf-tutorial-dpo-alignment.ipynb,sha256=EmHZkmsVUqgonlcNgJfYri4-MK0K_Ad7lkkLQDTGRjs,15698
|
|
279
282
|
tutorial_notebooks/rf-tutorial-grpo-mathreasoning-lite.ipynb,sha256=BXBQTzAdHmugUA0a5_eLK5AWitQaM_fOdfH6V-DnrOY,13111
|
|
280
283
|
tutorial_notebooks/rf-tutorial-grpo-mathreasoning.ipynb,sha256=vL8gdGXqDYl1iGAi2wbPnqU7WcS5pzQG9KEiP4dAWm4,14255
|
|
281
284
|
tutorial_notebooks/rf-tutorial-sft-chatqa-lite.ipynb,sha256=A5nLbJyKDa1cPIBZbZsqHH2MS15Eb9m57zV22486av8,12235
|
|
282
285
|
tutorial_notebooks/rf-tutorial-sft-chatqa.ipynb,sha256=qGMwOZM9VYJVLSz02hl5pfUcwdMNhLLcypBogJ7G3Bo,12140
|
|
283
|
-
rapidfireai-0.
|
|
284
|
-
rapidfireai-0.
|
|
285
|
-
rapidfireai-0.
|
|
286
|
-
rapidfireai-0.
|
|
287
|
-
rapidfireai-0.
|
|
286
|
+
rapidfireai-0.11.1rc1.dist-info/METADATA,sha256=635WC_wVxFUH7seXaYpQQ4Lpu0S_uRapOAan8iq61H0,12611
|
|
287
|
+
rapidfireai-0.11.1rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
288
|
+
rapidfireai-0.11.1rc1.dist-info/entry_points.txt,sha256=tuZF1oC4KyQ9H767o83S8Y-ZiGvw_PVADPL1vRykY3g,53
|
|
289
|
+
rapidfireai-0.11.1rc1.dist-info/top_level.txt,sha256=A28FddyVhe1LHCbvbigLRtmEWKHGVgOVKH1_FfbUQ2U,12
|
|
290
|
+
rapidfireai-0.11.1rc1.dist-info/RECORD,,
|