rapidfireai 0.9.9__py3-none-any.whl → 0.9.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidfireai might be problematic. Click here for more details.
- rapidfireai/cli.py +25 -5
- rapidfireai/experiment.py +5 -1
- rapidfireai/start.sh +154 -141
- rapidfireai/utils/constants.py +1 -2
- rapidfireai/utils/ping.py +29 -0
- rapidfireai/utils/shm_manager.py +15 -11
- rapidfireai/version.py +2 -2
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/METADATA +40 -11
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/RECORD +19 -12
- rapidfireai-0.9.11.dist-info/entry_points.txt +2 -0
- tutorial_notebooks/rf-tutorial-dpo-alignment-lite.ipynb +412 -0
- tutorial_notebooks/rf-tutorial-dpo-alignment.ipynb +427 -0
- tutorial_notebooks/rf-tutorial-grpo-mathreasoning-lite.ipynb +358 -0
- tutorial_notebooks/rf-tutorial-grpo-mathreasoning.ipynb +371 -0
- tutorial_notebooks/rf-tutorial-sft-chatqa-lite.ipynb +329 -0
- tutorial_notebooks/rf-tutorial-sft-chatqa.ipynb +331 -0
- rapidfireai-0.9.9.dist-info/entry_points.txt +0 -2
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/WHEEL +0 -0
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/licenses/LICENSE +0 -0
- {rapidfireai-0.9.9.dist-info → rapidfireai-0.9.11.dist-info}/top_level.txt +0 -0
rapidfireai/start.sh
CHANGED
|
@@ -7,13 +7,15 @@
|
|
|
7
7
|
set -e # Exit on any error
|
|
8
8
|
|
|
9
9
|
# Configuration
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
RF_MLFLOW_PORT=${RF_MLFLOW_PORT:=5002}
|
|
11
|
+
RF_MLFLOW_HOST=${RF_MLFLOW_HOST:=127.0.0.1}
|
|
12
|
+
RF_FRONTEND_PORT=${RF_FRONTEND_PORT:=3000}
|
|
13
|
+
RF_FRONTEND_HOST=${RF_FRONTEND_HOST:=0.0.0.0}
|
|
14
14
|
# API server configuration - these should match DispatcherConfig in constants.py
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
RF_API_PORT=${RF_API_PORT:=8080}
|
|
16
|
+
RF_API_HOST=${RF_API_HOST:=127.0.0.1}
|
|
17
|
+
|
|
18
|
+
RF_DB_PATH="${RF_DB_PATH:=$HOME/db}"
|
|
17
19
|
|
|
18
20
|
# Colors for output
|
|
19
21
|
RED='\033[0;31m'
|
|
@@ -23,13 +25,25 @@ BLUE='\033[0;34m'
|
|
|
23
25
|
NC='\033[0m' # No Color
|
|
24
26
|
|
|
25
27
|
# PID file to track processes
|
|
26
|
-
|
|
28
|
+
RF_PID_FILE="${RF_PID_FILE:=rapidfire_pids.txt}"
|
|
27
29
|
|
|
28
30
|
# Directory paths for pip-installed package
|
|
29
31
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
30
32
|
DISPATCHER_DIR="$SCRIPT_DIR/dispatcher"
|
|
31
33
|
FRONTEND_DIR="$SCRIPT_DIR/frontend"
|
|
32
34
|
|
|
35
|
+
RF_PYTHON_EXECUTABLE=${RF_PYTHON_EXECUTABLE:-python3}
|
|
36
|
+
RF_PIP_EXECUTABLE=${RF_PIP_EXECUTABLE:-pip3}
|
|
37
|
+
|
|
38
|
+
if ! command -v $RF_PYTHON_EXECUTABLE &> /dev/null; then
|
|
39
|
+
RF_PYTHON_EXECUTABLE=python
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
if ! command -v $RF_PIP_EXECUTABLE &> /dev/null; then
|
|
43
|
+
RF_PIP_EXECUTABLE=pip
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
|
|
33
47
|
# Function to print colored output
|
|
34
48
|
print_status() {
|
|
35
49
|
echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"
|
|
@@ -50,44 +64,36 @@ print_warning() {
|
|
|
50
64
|
# Function to setup Python environment
|
|
51
65
|
setup_python_env() {
|
|
52
66
|
print_status "Setting up Python environment..."
|
|
53
|
-
|
|
67
|
+
|
|
54
68
|
# Check if the package can be imported
|
|
55
69
|
print_status "Verifying rapidfireai package availability..."
|
|
56
|
-
|
|
57
|
-
if
|
|
58
|
-
print_success "rapidfireai package is available with
|
|
70
|
+
|
|
71
|
+
if ${RF_PYTHON_EXECUTABLE} -c "import rapidfireai; print('Package imported successfully with ${RF_PYTHON_EXECUTABLE}')" 2>/dev/null; then
|
|
72
|
+
print_success "rapidfireai package is available with ${RF_PYTHON_EXECUTABLE}"
|
|
59
73
|
else
|
|
60
|
-
print_error "rapidfireai package is not available with
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
print_status "Trying to import with python..."
|
|
64
|
-
if python -c "import rapidfireai; print('Package imported successfully with python')" 2>/dev/null; then
|
|
65
|
-
print_success "rapidfireai package is available with python"
|
|
66
|
-
else
|
|
67
|
-
print_error "rapidfireai package is not available with python"
|
|
68
|
-
print_warning "Try reinstalling the package: pip install rapidfireai"
|
|
69
|
-
return 1
|
|
70
|
-
fi
|
|
74
|
+
print_error "rapidfireai package is not available with ${RF_PYTHON_EXECUTABLE}"
|
|
75
|
+
print_warning "Try reinstalling the package: ${RF_PIP_EXECUTABLE} install rapidfireai"
|
|
76
|
+
return 1
|
|
71
77
|
fi
|
|
72
|
-
|
|
78
|
+
|
|
73
79
|
# Install any missing dependencies
|
|
74
80
|
print_status "Checking for required dependencies..."
|
|
75
|
-
if
|
|
81
|
+
if ${RF_PYTHON_EXECUTABLE} -c "import mlflow, gunicorn, flask" 2>/dev/null; then
|
|
76
82
|
print_success "All required dependencies are available"
|
|
77
83
|
else
|
|
78
84
|
print_warning "Some dependencies may be missing. Installing requirements..."
|
|
79
|
-
|
|
85
|
+
${RF_PIP_EXECUTABLE} install mlflow gunicorn flask flask-cors
|
|
80
86
|
fi
|
|
81
|
-
|
|
87
|
+
|
|
82
88
|
return 0
|
|
83
89
|
}
|
|
84
90
|
|
|
85
91
|
# Function to cleanup processes on exit
|
|
86
92
|
cleanup() {
|
|
87
93
|
print_warning "Shutting down services..."
|
|
88
|
-
|
|
94
|
+
|
|
89
95
|
# Kill processes by port (more reliable for MLflow)
|
|
90
|
-
for port in $
|
|
96
|
+
for port in $RF_MLFLOW_PORT $RF_FRONTEND_PORT $RF_API_PORT; do
|
|
91
97
|
local pids=$(lsof -ti :$port 2>/dev/null || true)
|
|
92
98
|
if [[ -n "$pids" ]]; then
|
|
93
99
|
print_status "Killing processes on port $port"
|
|
@@ -100,9 +106,9 @@ cleanup() {
|
|
|
100
106
|
fi
|
|
101
107
|
fi
|
|
102
108
|
done
|
|
103
|
-
|
|
109
|
+
|
|
104
110
|
# Clean up tracked PIDs
|
|
105
|
-
if [[ -f "$
|
|
111
|
+
if [[ -f "$RF_PID_FILE" ]]; then
|
|
106
112
|
while read -r pid service; do
|
|
107
113
|
if kill -0 "$pid" 2>/dev/null; then
|
|
108
114
|
print_status "Stopping $service (PID: $pid)"
|
|
@@ -114,25 +120,25 @@ cleanup() {
|
|
|
114
120
|
kill -9 -$pid 2>/dev/null || kill -9 $pid 2>/dev/null || true
|
|
115
121
|
fi
|
|
116
122
|
fi
|
|
117
|
-
done < "$
|
|
118
|
-
rm -f "$
|
|
123
|
+
done < "$RF_PID_FILE"
|
|
124
|
+
rm -f "$RF_PID_FILE"
|
|
119
125
|
fi
|
|
120
|
-
|
|
126
|
+
|
|
121
127
|
# Final cleanup - kill any remaining MLflow, gunicorn, or Flask processes
|
|
122
128
|
pkill -f "mlflow server" 2>/dev/null || true
|
|
123
129
|
pkill -f "gunicorn.*rapidfireai" 2>/dev/null || true
|
|
124
130
|
pkill -f "python3.*server.py" 2>/dev/null || true
|
|
125
131
|
pkill -f "python.*server.py" 2>/dev/null || true
|
|
126
|
-
|
|
132
|
+
|
|
127
133
|
# Additional cleanup for any remaining processes on our ports
|
|
128
|
-
for port in $
|
|
134
|
+
for port in $RF_MLFLOW_PORT $RF_FRONTEND_PORT $RF_API_PORT; do
|
|
129
135
|
local remaining_pids=$(lsof -ti :$port 2>/dev/null || true)
|
|
130
136
|
if [[ -n "$remaining_pids" ]]; then
|
|
131
137
|
print_status "Force killing remaining processes on port $port"
|
|
132
138
|
echo "$remaining_pids" | xargs kill -9 2>/dev/null || true
|
|
133
139
|
fi
|
|
134
140
|
done
|
|
135
|
-
|
|
141
|
+
|
|
136
142
|
print_success "All services stopped"
|
|
137
143
|
exit 0
|
|
138
144
|
}
|
|
@@ -141,7 +147,7 @@ cleanup() {
|
|
|
141
147
|
check_port() {
|
|
142
148
|
local port=$1
|
|
143
149
|
local service=$2
|
|
144
|
-
|
|
150
|
+
|
|
145
151
|
if lsof -Pi :$port -sTCP:LISTEN -t >/dev/null 2>&1; then
|
|
146
152
|
print_error "Port $port is already in use. Cannot start $service."
|
|
147
153
|
print_status "Processes using port $port:"
|
|
@@ -154,40 +160,40 @@ check_port() {
|
|
|
154
160
|
# Function to check for common startup issues
|
|
155
161
|
check_startup_issues() {
|
|
156
162
|
print_status "Checking for common startup issues..."
|
|
157
|
-
|
|
163
|
+
|
|
158
164
|
# Check Python version and packages
|
|
159
|
-
if command -v
|
|
160
|
-
local python_version=$(
|
|
165
|
+
if command -v ${RF_PYTHON_EXECUTABLE} &> /dev/null; then
|
|
166
|
+
local python_version=$(${RF_PYTHON_EXECUTABLE} --version 2>&1)
|
|
161
167
|
print_status "Python version: $python_version"
|
|
162
|
-
|
|
168
|
+
|
|
163
169
|
# Check for required packages
|
|
164
170
|
local missing_packages=()
|
|
165
171
|
for package in mlflow gunicorn flask; do
|
|
166
|
-
if !
|
|
172
|
+
if ! ${RF_PYTHON_EXECUTABLE} -c "import $package" 2>/dev/null; then
|
|
167
173
|
missing_packages+=("$package")
|
|
168
174
|
fi
|
|
169
175
|
done
|
|
170
|
-
|
|
176
|
+
|
|
171
177
|
if [[ ${#missing_packages[@]} -gt 0 ]]; then
|
|
172
178
|
print_warning "Missing packages: ${missing_packages[*]}"
|
|
173
179
|
print_status "Installing missing packages..."
|
|
174
|
-
|
|
180
|
+
${RF_PIP_EXECUTABLE} install "${missing_packages[@]}" || print_error "Failed to install packages"
|
|
175
181
|
fi
|
|
176
182
|
fi
|
|
177
|
-
|
|
183
|
+
|
|
178
184
|
# Check disk space
|
|
179
185
|
local available_space=$(df . | awk 'NR==2 {print $4}')
|
|
180
186
|
if [[ $available_space -lt 1000000 ]]; then
|
|
181
187
|
print_warning "Low disk space: ${available_space}KB available"
|
|
182
188
|
fi
|
|
183
|
-
|
|
189
|
+
|
|
184
190
|
# Check if we can write to current directory
|
|
185
191
|
if ! touch "$SCRIPT_DIR/test_write.tmp" 2>/dev/null; then
|
|
186
192
|
print_error "Cannot write to script directory: $SCRIPT_DIR"
|
|
187
193
|
return 1
|
|
188
194
|
fi
|
|
189
195
|
rm -f "$SCRIPT_DIR/test_write.tmp"
|
|
190
|
-
|
|
196
|
+
|
|
191
197
|
return 0
|
|
192
198
|
}
|
|
193
199
|
|
|
@@ -198,18 +204,23 @@ wait_for_service() {
|
|
|
198
204
|
local service=$3
|
|
199
205
|
local max_attempts=${4:-30} # Allow custom timeout, default 30 seconds
|
|
200
206
|
local attempt=1
|
|
201
|
-
|
|
202
|
-
print_status "Waiting for $service to be ready on $host:$port (timeout: ${max_attempts}
|
|
203
|
-
|
|
207
|
+
|
|
208
|
+
print_status "Waiting for $service to be ready on $host:$port (timeout: ${max_attempts} attempts)..."
|
|
209
|
+
|
|
210
|
+
if command -v nc &> /dev/null; then
|
|
211
|
+
ping_command="$(command -v nc) -z $host $port"
|
|
212
|
+
else
|
|
213
|
+
ping_command="$RF_PYTHON_EXECUTABLE -c 'from rapidfireai.utils.ping import ping_server; checker=ping_server(\"${host}\", ${port}); exit(1) if not checker else exit(0)'"
|
|
214
|
+
fi
|
|
204
215
|
while [ $attempt -le $max_attempts ]; do
|
|
205
|
-
if
|
|
216
|
+
if eval ${ping_command} &>/dev/null; then
|
|
206
217
|
print_success "$service is ready!"
|
|
207
218
|
return 0
|
|
208
219
|
fi
|
|
209
220
|
sleep 1
|
|
210
221
|
((attempt++))
|
|
211
222
|
done
|
|
212
|
-
|
|
223
|
+
|
|
213
224
|
print_error "$service failed to start within expected time (${max_attempts}s)"
|
|
214
225
|
return 1
|
|
215
226
|
}
|
|
@@ -217,50 +228,52 @@ wait_for_service() {
|
|
|
217
228
|
# Function to start MLflow server
|
|
218
229
|
start_mlflow() {
|
|
219
230
|
print_status "Starting MLflow server..."
|
|
220
|
-
|
|
221
|
-
|
|
231
|
+
print_status "Making Database directory $RF_DB_PATH..."
|
|
232
|
+
mkdir -p "$RF_DB_PATH"
|
|
233
|
+
|
|
234
|
+
if ! check_port $RF_MLFLOW_PORT "MLflow server"; then
|
|
222
235
|
return 1
|
|
223
236
|
fi
|
|
224
|
-
|
|
237
|
+
|
|
225
238
|
# Start MLflow server in background with logging
|
|
226
239
|
print_status "MLflow logs will be written to: $SCRIPT_DIR/mlflow.log"
|
|
227
|
-
|
|
240
|
+
|
|
228
241
|
# Use setsid on Linux, nohup on macOS
|
|
229
242
|
if command -v setsid &> /dev/null; then
|
|
230
243
|
setsid mlflow server \
|
|
231
|
-
--host $
|
|
232
|
-
--port $
|
|
233
|
-
--backend-store-uri sqlite
|
|
244
|
+
--host $RF_MLFLOW_HOST \
|
|
245
|
+
--port $RF_MLFLOW_PORT \
|
|
246
|
+
--backend-store-uri sqlite:///${RF_DB_PATH}/mlflow.db > "$SCRIPT_DIR/mlflow.log" 2>&1 &
|
|
234
247
|
else
|
|
235
248
|
nohup mlflow server \
|
|
236
|
-
--host $
|
|
237
|
-
--port $
|
|
238
|
-
--backend-store-uri sqlite
|
|
249
|
+
--host $RF_MLFLOW_HOST \
|
|
250
|
+
--port $RF_MLFLOW_PORT \
|
|
251
|
+
--backend-store-uri sqlite:///${RF_DB_PATH}/mlflow.db > "$SCRIPT_DIR/mlflow.log" 2>&1 &
|
|
239
252
|
fi
|
|
240
|
-
|
|
253
|
+
|
|
241
254
|
local mlflow_pid=$!
|
|
242
|
-
echo "$mlflow_pid MLflow" >> "$
|
|
243
|
-
|
|
255
|
+
echo "$mlflow_pid MLflow" >> "$RF_PID_FILE"
|
|
256
|
+
|
|
244
257
|
# Wait for MLflow to be ready
|
|
245
|
-
if wait_for_service $
|
|
258
|
+
if wait_for_service $RF_MLFLOW_HOST $RF_MLFLOW_PORT "MLflow server"; then
|
|
246
259
|
print_success "MLflow server started (PID: $mlflow_pid)"
|
|
247
260
|
return 0
|
|
248
261
|
else
|
|
249
262
|
print_error "MLflow server failed to start. Checking for errors..."
|
|
250
|
-
|
|
263
|
+
|
|
251
264
|
# Check if process is still running
|
|
252
265
|
if ! kill -0 "$mlflow_pid" 2>/dev/null; then
|
|
253
266
|
print_error "MLflow process has died. Checking logs for startup errors:"
|
|
254
267
|
else
|
|
255
268
|
print_error "MLflow process is running but not responding. Checking logs:"
|
|
256
269
|
fi
|
|
257
|
-
|
|
270
|
+
|
|
258
271
|
if [[ -f "$SCRIPT_DIR/mlflow.log" ]]; then
|
|
259
272
|
echo "=== Last 30 lines of mlflow.log ==="
|
|
260
273
|
tail -30 "$SCRIPT_DIR/mlflow.log"
|
|
261
274
|
echo "=== End of logs ==="
|
|
262
275
|
echo ""
|
|
263
|
-
|
|
276
|
+
|
|
264
277
|
# Look for specific error patterns
|
|
265
278
|
if grep -q "Error\|Exception\|Traceback\|Failed\|ImportError\|ModuleNotFoundError" "$SCRIPT_DIR/mlflow.log"; then
|
|
266
279
|
print_error "Found error messages in logs:"
|
|
@@ -269,13 +282,13 @@ start_mlflow() {
|
|
|
269
282
|
else
|
|
270
283
|
print_error "No mlflow.log file found"
|
|
271
284
|
fi
|
|
272
|
-
|
|
285
|
+
|
|
273
286
|
# Check if there are any Python errors in the process
|
|
274
287
|
if kill -0 "$mlflow_pid" 2>/dev/null; then
|
|
275
288
|
print_status "MLflow process details:"
|
|
276
289
|
ps -p "$mlflow_pid" -o pid,ppid,cmd,etime 2>/dev/null || true
|
|
277
290
|
fi
|
|
278
|
-
|
|
291
|
+
|
|
279
292
|
return 1
|
|
280
293
|
fi
|
|
281
294
|
}
|
|
@@ -283,57 +296,57 @@ start_mlflow() {
|
|
|
283
296
|
# Function to start API server
|
|
284
297
|
start_api_server() {
|
|
285
298
|
print_status "Starting API server with Gunicorn..."
|
|
286
|
-
|
|
299
|
+
|
|
287
300
|
# Check if dispatcher directory exists
|
|
288
301
|
if [[ ! -d "$DISPATCHER_DIR" ]]; then
|
|
289
302
|
print_error "Dispatcher directory not found at $DISPATCHER_DIR"
|
|
290
303
|
return 1
|
|
291
304
|
fi
|
|
292
|
-
|
|
305
|
+
|
|
293
306
|
# Check if gunicorn config file exists
|
|
294
307
|
if [[ ! -f "$DISPATCHER_DIR/gunicorn.conf.py" ]]; then
|
|
295
308
|
print_error "gunicorn.conf.py not found in dispatcher directory"
|
|
296
309
|
return 1
|
|
297
310
|
fi
|
|
298
|
-
|
|
311
|
+
|
|
299
312
|
# Create database directory
|
|
300
313
|
print_status "Creating database directory..."
|
|
301
314
|
mkdir -p ~/db
|
|
302
315
|
# Ensure proper permissions
|
|
303
316
|
chmod 755 ~/db
|
|
304
|
-
|
|
317
|
+
|
|
305
318
|
# Change to dispatcher directory and start Gunicorn server
|
|
306
319
|
cd "$DISPATCHER_DIR"
|
|
307
|
-
|
|
320
|
+
|
|
308
321
|
# Start Gunicorn server in background with logging
|
|
309
322
|
print_status "API server logs will be written to: $SCRIPT_DIR/api.log"
|
|
310
323
|
gunicorn -c gunicorn.conf.py > "$SCRIPT_DIR/api.log" 2>&1 &
|
|
311
|
-
|
|
324
|
+
|
|
312
325
|
local api_pid=$!
|
|
313
326
|
cd "$SCRIPT_DIR" # Return to original directory
|
|
314
|
-
echo "$api_pid API_Server" >> "$
|
|
315
|
-
|
|
327
|
+
echo "$api_pid API_Server" >> "$RF_PID_FILE"
|
|
328
|
+
|
|
316
329
|
# Wait for API server to be ready - use longer timeout for API server
|
|
317
|
-
if wait_for_service $
|
|
330
|
+
if wait_for_service $RF_API_HOST $RF_API_PORT "API server" 60; then
|
|
318
331
|
print_success "API server started (PID: $api_pid)"
|
|
319
|
-
print_status "API server available at: http://$
|
|
332
|
+
print_status "API server available at: http://$RF_API_HOST:$RF_API_PORT"
|
|
320
333
|
return 0
|
|
321
334
|
else
|
|
322
335
|
print_error "API server failed to start. Checking for errors..."
|
|
323
|
-
|
|
336
|
+
|
|
324
337
|
# Check if process is still running
|
|
325
338
|
if ! kill -0 "$api_pid" 2>/dev/null; then
|
|
326
339
|
print_error "API process has died. Checking logs for startup errors:"
|
|
327
340
|
else
|
|
328
341
|
print_error "API process is running but not responding. Checking logs:"
|
|
329
342
|
fi
|
|
330
|
-
|
|
343
|
+
|
|
331
344
|
if [[ -f "$SCRIPT_DIR/api.log" ]]; then
|
|
332
345
|
echo "=== Last 30 lines of api.log ==="
|
|
333
346
|
tail -30 "$SCRIPT_DIR/api.log"
|
|
334
347
|
echo "=== End of logs ==="
|
|
335
348
|
echo ""
|
|
336
|
-
|
|
349
|
+
|
|
337
350
|
# Look for specific error patterns
|
|
338
351
|
if grep -q "Error\|Exception\|Traceback\|Failed\|ImportError\|ModuleNotFoundError" "$SCRIPT_DIR/api.log"; then
|
|
339
352
|
print_error "Found error messages in logs:"
|
|
@@ -342,13 +355,13 @@ start_api_server() {
|
|
|
342
355
|
else
|
|
343
356
|
print_error "No api.log file found"
|
|
344
357
|
fi
|
|
345
|
-
|
|
358
|
+
|
|
346
359
|
# Check if there are any Python errors in the process
|
|
347
360
|
if kill -0 "$api_pid" 2>/dev/null; then
|
|
348
361
|
print_status "API process details:"
|
|
349
362
|
ps -p "$api_pid" -o pid,ppid,cmd,etime 2>/dev/null || true
|
|
350
363
|
fi
|
|
351
|
-
|
|
364
|
+
|
|
352
365
|
return 1
|
|
353
366
|
fi
|
|
354
367
|
}
|
|
@@ -356,97 +369,97 @@ start_api_server() {
|
|
|
356
369
|
# Function to start frontend server
|
|
357
370
|
start_frontend() {
|
|
358
371
|
print_status "Starting frontend tracking server..."
|
|
359
|
-
|
|
360
|
-
if ! check_port $
|
|
372
|
+
|
|
373
|
+
if ! check_port $RF_FRONTEND_PORT "Frontend server"; then
|
|
361
374
|
return 1
|
|
362
375
|
fi
|
|
363
|
-
|
|
376
|
+
|
|
364
377
|
# Check if frontend directory exists
|
|
365
378
|
if [[ ! -d "$FRONTEND_DIR" ]]; then
|
|
366
379
|
print_error "Frontend directory not found at $FRONTEND_DIR"
|
|
367
380
|
return 1
|
|
368
381
|
fi
|
|
369
|
-
|
|
382
|
+
|
|
370
383
|
# Change to frontend directory
|
|
371
384
|
cd "$FRONTEND_DIR"
|
|
372
|
-
|
|
385
|
+
|
|
373
386
|
# Check if build directory exists
|
|
374
387
|
if [[ ! -d "build" ]]; then
|
|
375
388
|
print_error "Build directory not found. Please run 'npm run build' in the frontend directory first."
|
|
376
389
|
cd "$SCRIPT_DIR"
|
|
377
390
|
return 1
|
|
378
391
|
fi
|
|
379
|
-
|
|
392
|
+
|
|
380
393
|
# Check if Flask server exists
|
|
381
394
|
if [[ ! -f "server.py" ]]; then
|
|
382
395
|
print_error "Flask server (server.py) not found in frontend directory"
|
|
383
396
|
cd "$SCRIPT_DIR"
|
|
384
397
|
return 1
|
|
385
398
|
fi
|
|
386
|
-
|
|
399
|
+
|
|
387
400
|
# Test if the server can be imported without errors
|
|
388
401
|
print_status "Testing frontend server imports..."
|
|
389
|
-
if !
|
|
402
|
+
if ! ${RF_PYTHON_EXECUTABLE} -c "import server" 2>/dev/null; then
|
|
390
403
|
print_error "Frontend server has import errors. Testing with verbose output:"
|
|
391
|
-
|
|
404
|
+
${RF_PYTHON_EXECUTABLE} -c "import server" 2>&1 | head -20
|
|
392
405
|
cd "$SCRIPT_DIR"
|
|
393
406
|
return 1
|
|
394
407
|
fi
|
|
395
408
|
print_success "Frontend server imports successfully"
|
|
396
|
-
|
|
409
|
+
|
|
397
410
|
print_status "Starting production frontend server with Flask..."
|
|
398
|
-
|
|
411
|
+
|
|
399
412
|
# Start Flask server in background with process group
|
|
400
413
|
print_status "Frontend logs will be written to: $SCRIPT_DIR/frontend.log"
|
|
401
414
|
cd "$FRONTEND_DIR"
|
|
402
|
-
|
|
415
|
+
|
|
403
416
|
# Use setsid on Linux, nohup on macOS for better process management
|
|
404
417
|
if command -v setsid &> /dev/null; then
|
|
405
|
-
PORT=$
|
|
418
|
+
PORT=$RF_FRONTEND_PORT setsid ${RF_PYTHON_EXECUTABLE} server.py > "$SCRIPT_DIR/frontend.log" 2>&1 &
|
|
406
419
|
else
|
|
407
|
-
PORT=$
|
|
420
|
+
PORT=$RF_FRONTEND_PORT nohup ${RF_PYTHON_EXECUTABLE} server.py > "$SCRIPT_DIR/frontend.log" 2>&1 &
|
|
408
421
|
fi
|
|
409
|
-
|
|
422
|
+
|
|
410
423
|
local frontend_pid=$!
|
|
411
424
|
cd "$SCRIPT_DIR" # Return to original directory
|
|
412
|
-
|
|
425
|
+
|
|
413
426
|
# Store both PID and process group ID for better cleanup
|
|
414
427
|
if command -v setsid &> /dev/null; then
|
|
415
428
|
# On Linux, we can get the process group ID
|
|
416
|
-
echo "$frontend_pid Frontend_Flask" >> "$
|
|
429
|
+
echo "$frontend_pid Frontend_Flask" >> "$RF_PID_FILE"
|
|
417
430
|
else
|
|
418
431
|
# On macOS, just store the PID
|
|
419
|
-
echo "$frontend_pid Frontend_Flask" >> "$
|
|
432
|
+
echo "$frontend_pid Frontend_Flask" >> "$RF_PID_FILE"
|
|
420
433
|
fi
|
|
421
|
-
|
|
434
|
+
|
|
422
435
|
# Wait for frontend to be ready - check both localhost and 127.0.0.1
|
|
423
436
|
local frontend_ready=false
|
|
424
437
|
local check_hosts=("localhost" "127.0.0.1")
|
|
425
|
-
|
|
438
|
+
|
|
426
439
|
for host in "${check_hosts[@]}"; do
|
|
427
|
-
if wait_for_service $host $
|
|
428
|
-
print_success "Frontend Flask server started (PID: $frontend_pid) on $host:$
|
|
440
|
+
if wait_for_service $host $RF_FRONTEND_PORT "Frontend server" 15; then
|
|
441
|
+
print_success "Frontend Flask server started (PID: $frontend_pid) on $host:$RF_FRONTEND_PORT"
|
|
429
442
|
frontend_ready=true
|
|
430
443
|
break
|
|
431
444
|
fi
|
|
432
445
|
done
|
|
433
|
-
|
|
446
|
+
|
|
434
447
|
if [[ "$frontend_ready" == false ]]; then
|
|
435
448
|
print_error "Frontend Flask server failed to start. Checking for errors..."
|
|
436
|
-
|
|
449
|
+
|
|
437
450
|
# Check if process is still running
|
|
438
451
|
if ! kill -0 "$frontend_pid" 2>/dev/null; then
|
|
439
452
|
print_error "Frontend process has died. Checking logs for startup errors:"
|
|
440
453
|
else
|
|
441
454
|
print_error "Frontend process is running but not responding. Checking logs:"
|
|
442
455
|
fi
|
|
443
|
-
|
|
456
|
+
|
|
444
457
|
if [[ -f "$SCRIPT_DIR/frontend.log" ]]; then
|
|
445
458
|
echo "=== Last 30 lines of frontend.log ==="
|
|
446
459
|
tail -30 "$SCRIPT_DIR/frontend.log"
|
|
447
460
|
echo "=== End of logs ==="
|
|
448
461
|
echo ""
|
|
449
|
-
|
|
462
|
+
|
|
450
463
|
# Look for specific error patterns
|
|
451
464
|
if grep -q "Error\|Exception\|Traceback\|Failed" "$SCRIPT_DIR/frontend.log"; then
|
|
452
465
|
print_error "Found error messages in logs:"
|
|
@@ -455,16 +468,16 @@ start_frontend() {
|
|
|
455
468
|
else
|
|
456
469
|
print_error "No frontend.log file found"
|
|
457
470
|
fi
|
|
458
|
-
|
|
471
|
+
|
|
459
472
|
# Check if there are any Python errors in the process
|
|
460
473
|
if kill -0 "$frontend_pid" 2>/dev/null; then
|
|
461
474
|
print_status "Frontend process details:"
|
|
462
475
|
ps -p "$frontend_pid" -o pid,ppid,cmd,etime 2>/dev/null || true
|
|
463
476
|
fi
|
|
464
|
-
|
|
477
|
+
|
|
465
478
|
return 1
|
|
466
479
|
fi
|
|
467
|
-
|
|
480
|
+
|
|
468
481
|
return 0
|
|
469
482
|
}
|
|
470
483
|
|
|
@@ -472,24 +485,24 @@ start_frontend() {
|
|
|
472
485
|
show_status() {
|
|
473
486
|
print_status "RapidFire AI Services Status:"
|
|
474
487
|
echo "=================================="
|
|
475
|
-
|
|
476
|
-
if [[ -f "$
|
|
488
|
+
|
|
489
|
+
if [[ -f "$RF_PID_FILE" ]]; then
|
|
477
490
|
while read -r pid service; do
|
|
478
491
|
if kill -0 "$pid" 2>/dev/null; then
|
|
479
492
|
print_success "$service is running (PID: $pid)"
|
|
480
493
|
else
|
|
481
494
|
print_error "$service is not running (PID: $pid)"
|
|
482
495
|
fi
|
|
483
|
-
done < "$
|
|
496
|
+
done < "$RF_PID_FILE"
|
|
484
497
|
else
|
|
485
498
|
print_warning "No services are currently tracked"
|
|
486
499
|
fi
|
|
487
|
-
|
|
500
|
+
|
|
488
501
|
echo ""
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
502
|
+
print_success "🚀 RapidFire Frontend is ready!"
|
|
503
|
+
print_status "👉 Open your browser and navigate to: http://$RF_FRONTEND_HOST:$RF_FRONTEND_PORT"
|
|
504
|
+
print_status " (Click the link above or copy/paste the URL into your browser)"
|
|
505
|
+
|
|
493
506
|
# Show log file status
|
|
494
507
|
echo ""
|
|
495
508
|
print_status "Log files:"
|
|
@@ -507,41 +520,41 @@ show_status() {
|
|
|
507
520
|
start_services() {
|
|
508
521
|
local services_started=0
|
|
509
522
|
local total_services=3
|
|
510
|
-
|
|
523
|
+
|
|
511
524
|
# Start MLflow server
|
|
512
525
|
if start_mlflow; then
|
|
513
526
|
((services_started++))
|
|
514
527
|
else
|
|
515
528
|
print_error "Failed to start MLflow server"
|
|
516
529
|
fi
|
|
517
|
-
|
|
530
|
+
|
|
518
531
|
# Start API server
|
|
519
532
|
if start_api_server; then
|
|
520
533
|
((services_started++))
|
|
521
534
|
else
|
|
522
535
|
print_error "Failed to start API server"
|
|
523
536
|
fi
|
|
524
|
-
|
|
537
|
+
|
|
525
538
|
# Start frontend server
|
|
526
539
|
if start_frontend; then
|
|
527
540
|
((services_started++))
|
|
528
541
|
else
|
|
529
542
|
print_error "Failed to start frontend server"
|
|
530
543
|
fi
|
|
531
|
-
|
|
544
|
+
|
|
532
545
|
return $((total_services - services_started))
|
|
533
546
|
}
|
|
534
547
|
|
|
535
548
|
# Main execution
|
|
536
549
|
main() {
|
|
537
550
|
print_status "Starting RapidFire AI services..."
|
|
538
|
-
|
|
551
|
+
|
|
539
552
|
# Remove old PID file
|
|
540
|
-
rm -f "$
|
|
541
|
-
|
|
553
|
+
rm -f "$RF_PID_FILE"
|
|
554
|
+
|
|
542
555
|
# Set up signal handlers for cleanup
|
|
543
556
|
trap cleanup SIGINT SIGTERM EXIT
|
|
544
|
-
|
|
557
|
+
|
|
545
558
|
# Check for required commands
|
|
546
559
|
for cmd in mlflow gunicorn; do
|
|
547
560
|
if ! command -v $cmd &> /dev/null; then
|
|
@@ -549,41 +562,41 @@ main() {
|
|
|
549
562
|
exit 1
|
|
550
563
|
fi
|
|
551
564
|
done
|
|
552
|
-
|
|
565
|
+
|
|
553
566
|
# Setup Python environment
|
|
554
567
|
if ! setup_python_env; then
|
|
555
568
|
print_error "Failed to setup Python environment"
|
|
556
569
|
exit 1
|
|
557
570
|
fi
|
|
558
|
-
|
|
571
|
+
|
|
559
572
|
# Check for common startup issues
|
|
560
573
|
if ! check_startup_issues; then
|
|
561
574
|
print_error "Startup checks failed"
|
|
562
575
|
exit 1
|
|
563
576
|
fi
|
|
564
|
-
|
|
577
|
+
|
|
565
578
|
# Start services
|
|
566
579
|
if start_services; then
|
|
567
580
|
print_success "All services started successfully!"
|
|
568
581
|
show_status
|
|
569
|
-
|
|
582
|
+
|
|
570
583
|
print_status "Press Ctrl+C to stop all services"
|
|
571
|
-
|
|
584
|
+
|
|
572
585
|
# Keep script running and monitor processes
|
|
573
586
|
while true; do
|
|
574
587
|
sleep 5
|
|
575
588
|
# Check if any process died
|
|
576
|
-
if [[ -f "$
|
|
589
|
+
if [[ -f "$RF_PID_FILE" ]]; then
|
|
577
590
|
while read -r pid service; do
|
|
578
591
|
if ! kill -0 "$pid" 2>/dev/null; then
|
|
579
592
|
print_error "$service (PID: $pid) has stopped unexpectedly"
|
|
580
593
|
fi
|
|
581
|
-
done < "$
|
|
594
|
+
done < "$RF_PID_FILE"
|
|
582
595
|
fi
|
|
583
596
|
done
|
|
584
597
|
else
|
|
585
598
|
print_error "Failed to start one or more services"
|
|
586
|
-
|
|
599
|
+
|
|
587
600
|
# Show summary of all log files for debugging
|
|
588
601
|
print_status "=== Startup Failure Summary ==="
|
|
589
602
|
for log_file in "mlflow.log" "api.log" "frontend.log"; do
|
|
@@ -597,7 +610,7 @@ main() {
|
|
|
597
610
|
fi
|
|
598
611
|
fi
|
|
599
612
|
done
|
|
600
|
-
|
|
613
|
+
|
|
601
614
|
cleanup
|
|
602
615
|
exit 1
|
|
603
616
|
fi
|
|
@@ -631,4 +644,4 @@ case "${1:-start}" in
|
|
|
631
644
|
echo " setup - Setup Python environment only"
|
|
632
645
|
exit 1
|
|
633
646
|
;;
|
|
634
|
-
esac
|
|
647
|
+
esac
|