featrixsphere 0.2.1462__py3-none-any.whl → 0.2.1830__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featrixsphere/__init__.py +1 -1
- featrixsphere/client.py +300 -11
- {featrixsphere-0.2.1462.dist-info → featrixsphere-0.2.1830.dist-info}/METADATA +1 -1
- featrixsphere-0.2.1830.dist-info/RECORD +8 -0
- featrixsphere/cli.py +0 -338
- featrixsphere-0.2.1462.dist-info/RECORD +0 -9
- {featrixsphere-0.2.1462.dist-info → featrixsphere-0.2.1830.dist-info}/WHEEL +0 -0
- {featrixsphere-0.2.1462.dist-info → featrixsphere-0.2.1830.dist-info}/entry_points.txt +0 -0
- {featrixsphere-0.2.1462.dist-info → featrixsphere-0.2.1830.dist-info}/top_level.txt +0 -0
featrixsphere/__init__.py
CHANGED
featrixsphere/client.py
CHANGED
|
@@ -281,9 +281,20 @@ class FeatrixSphereClient:
|
|
|
281
281
|
max_retries = self.default_max_retries
|
|
282
282
|
|
|
283
283
|
# Special handling for session endpoints - longer retry window for 504 errors
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
284
|
+
# Session endpoints include /session/ and /upload_with_new_session/ (creates session)
|
|
285
|
+
is_session_endpoint = '/session/' in endpoint or '/upload_with_new_session' in endpoint
|
|
286
|
+
|
|
287
|
+
# For upload endpoints, use a much longer timeout (10 minutes for large files)
|
|
288
|
+
# This MUST happen before setting max_retry_time to ensure uploads get proper timeout
|
|
289
|
+
if '/upload_with_new_session' in endpoint:
|
|
290
|
+
# Override timeout if not explicitly set in kwargs
|
|
291
|
+
if 'timeout' not in kwargs:
|
|
292
|
+
kwargs['timeout'] = 600 # 10 minutes for file uploads
|
|
293
|
+
# Also set a longer max_retry_time for upload endpoints (10 minutes)
|
|
294
|
+
if max_retry_time is None:
|
|
295
|
+
max_retry_time = 600.0 # 10 minutes for upload endpoints
|
|
296
|
+
elif max_retry_time is None and is_session_endpoint:
|
|
297
|
+
max_retry_time = 120.0 # 120 seconds for other session endpoints
|
|
287
298
|
|
|
288
299
|
# Auto-add /compute prefix for session endpoints
|
|
289
300
|
if endpoint.startswith('/session/') and not endpoint.startswith('/compute/session/'):
|
|
@@ -323,7 +334,8 @@ class FeatrixSphereClient:
|
|
|
323
334
|
else:
|
|
324
335
|
# Out of retry time
|
|
325
336
|
print(f"API request failed: {method} {url}")
|
|
326
|
-
|
|
337
|
+
max_retry_time_str = f"{max_retry_time}s" if max_retry_time else "None"
|
|
338
|
+
print(f"504 Gateway Timeout - exceeded max retry time ({max_retry_time_str})")
|
|
327
339
|
raise
|
|
328
340
|
|
|
329
341
|
# Check for server restart patterns in 500 errors
|
|
@@ -1082,19 +1094,29 @@ class FeatrixSphereClient:
|
|
|
1082
1094
|
"""
|
|
1083
1095
|
|
|
1084
1096
|
if session_info.jobs:
|
|
1097
|
+
# Human-readable job type descriptions
|
|
1098
|
+
JOB_TYPE_NAMES = {
|
|
1099
|
+
'create_structured_data': 'Creating Structured Data',
|
|
1100
|
+
'train_es': 'Training Embedding Space',
|
|
1101
|
+
'train_knn': 'Training KNN Index',
|
|
1102
|
+
'run_clustering': 'Running Clustering',
|
|
1103
|
+
'train_single_predictor': 'Training Predictor',
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1085
1106
|
html_content += "<h4>Jobs:</h4><ul>"
|
|
1086
1107
|
for job_id, job in session_info.jobs.items():
|
|
1087
1108
|
job_status = job.get('status', 'unknown')
|
|
1088
1109
|
progress = job.get('progress')
|
|
1089
1110
|
job_type = job.get('type', job_id.split('_')[0])
|
|
1111
|
+
job_display_name = JOB_TYPE_NAMES.get(job_type, job_type)
|
|
1090
1112
|
|
|
1091
1113
|
if progress is not None:
|
|
1092
1114
|
progress_pct = progress * 100
|
|
1093
1115
|
progress_bar = "▓" * int(progress_pct / 5) + "░" * (20 - int(progress_pct / 5))
|
|
1094
|
-
html_content += f"<li><strong>{
|
|
1116
|
+
html_content += f"<li><strong>{job_display_name}:</strong> {job_status} [{progress_bar}] {progress_pct:.1f}%</li>"
|
|
1095
1117
|
else:
|
|
1096
1118
|
status_emoji = "✅" if job_status == "done" else "🔄" if job_status == "running" else "❌"
|
|
1097
|
-
html_content += f"<li>{status_emoji} <strong>{
|
|
1119
|
+
html_content += f"<li>{status_emoji} <strong>{job_display_name}:</strong> {job_status}</li>"
|
|
1098
1120
|
html_content += "</ul>"
|
|
1099
1121
|
|
|
1100
1122
|
display(HTML(html_content))
|
|
@@ -1149,6 +1171,15 @@ class FeatrixSphereClient:
|
|
|
1149
1171
|
from rich.text import Text
|
|
1150
1172
|
import time
|
|
1151
1173
|
|
|
1174
|
+
# Human-readable job type descriptions
|
|
1175
|
+
JOB_TYPE_NAMES = {
|
|
1176
|
+
'create_structured_data': 'Creating Structured Data',
|
|
1177
|
+
'train_es': 'Training Embedding Space',
|
|
1178
|
+
'train_knn': 'Training KNN Index',
|
|
1179
|
+
'run_clustering': 'Running Clustering',
|
|
1180
|
+
'train_single_predictor': 'Training Predictor',
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1152
1183
|
start_time = time.time()
|
|
1153
1184
|
job_tasks = {} # Track progress tasks for each job
|
|
1154
1185
|
|
|
@@ -1180,15 +1211,18 @@ class FeatrixSphereClient:
|
|
|
1180
1211
|
for job_id, job in session_info.jobs.items():
|
|
1181
1212
|
if job_id not in job_tasks:
|
|
1182
1213
|
job_type = job.get('type', job_id.split('_')[0])
|
|
1183
|
-
|
|
1214
|
+
job_display_name = JOB_TYPE_NAMES.get(job_type, job_type)
|
|
1215
|
+
job_tasks[job_id] = progress.add_task(f"[cyan]{job_display_name}", total=100)
|
|
1184
1216
|
|
|
1185
1217
|
# Update job progress
|
|
1186
1218
|
job_status = job.get('status', 'unknown')
|
|
1187
1219
|
raw_progress = job.get('progress', 0)
|
|
1188
1220
|
job_progress = 100 if job_status == 'done' else (raw_progress * 100 if raw_progress else 0)
|
|
1189
1221
|
|
|
1222
|
+
job_type = job.get('type', job_id.split('_')[0])
|
|
1223
|
+
job_display_name = JOB_TYPE_NAMES.get(job_type, job_type)
|
|
1190
1224
|
progress.update(job_tasks[job_id], completed=job_progress,
|
|
1191
|
-
description=f"[cyan]{
|
|
1225
|
+
description=f"[cyan]{job_display_name} ({job_status})")
|
|
1192
1226
|
|
|
1193
1227
|
# Check completion
|
|
1194
1228
|
if session_info.status in ['done', 'failed', 'cancelled']:
|
|
@@ -1232,8 +1266,20 @@ class FeatrixSphereClient:
|
|
|
1232
1266
|
status_callback(session_info, elapsed)
|
|
1233
1267
|
|
|
1234
1268
|
# Check if completed
|
|
1269
|
+
# CRITICAL: Only return done if we actually have jobs that completed
|
|
1270
|
+
# If no jobs exist, session shouldn't be marked as done
|
|
1235
1271
|
if session_info.status in ['completed', 'done', 'DONE']:
|
|
1236
|
-
|
|
1272
|
+
# Verify that jobs actually exist and completed
|
|
1273
|
+
if session_info.jobs:
|
|
1274
|
+
# Check if all jobs are in terminal states
|
|
1275
|
+
terminal_states = {'done', 'failed', 'cancelled'}
|
|
1276
|
+
all_terminal = all(job.get('status') in terminal_states for job in session_info.jobs.values())
|
|
1277
|
+
if all_terminal:
|
|
1278
|
+
return session_info
|
|
1279
|
+
else:
|
|
1280
|
+
# No jobs but status is "done" - this is a bug, keep waiting
|
|
1281
|
+
logger.warning(f"⚠️ Session {session_id} status is 'done' but no jobs exist - this shouldn't happen, continuing to wait...")
|
|
1282
|
+
# Don't return - keep waiting for jobs to appear
|
|
1237
1283
|
|
|
1238
1284
|
time.sleep(check_interval)
|
|
1239
1285
|
|
|
@@ -2145,8 +2191,38 @@ class FeatrixSphereClient:
|
|
|
2145
2191
|
data['quick_run'] = 'true'
|
|
2146
2192
|
data['epochs'] = str(epochs)
|
|
2147
2193
|
print(f"Training epochs: {epochs} (quick_run mode enabled)")
|
|
2148
|
-
|
|
2149
|
-
|
|
2194
|
+
|
|
2195
|
+
# Check file size - warn if very large
|
|
2196
|
+
file_size_mb = len(file_content) / (1024 * 1024)
|
|
2197
|
+
CHUNK_SIZE_MB = 512 # 512 MB chunk size
|
|
2198
|
+
CHUNK_SIZE_BYTES = CHUNK_SIZE_MB * 1024 * 1024
|
|
2199
|
+
|
|
2200
|
+
if file_size_mb > CHUNK_SIZE_MB:
|
|
2201
|
+
print(f"⚠️ Warning: File size ({file_size_mb:.1f} MB) exceeds {CHUNK_SIZE_MB} MB threshold")
|
|
2202
|
+
print(f" Large uploads may timeout. Consider splitting the data or using smaller batches.")
|
|
2203
|
+
|
|
2204
|
+
# Try upload with retry on 504
|
|
2205
|
+
import time
|
|
2206
|
+
upload_trace_id = f"UPLOAD-{int(time.time()*1000)}"
|
|
2207
|
+
print(f"🔵 [CLIENT] {upload_trace_id} Starting upload request")
|
|
2208
|
+
print(f" Endpoint: /compute/upload_with_new_session/")
|
|
2209
|
+
print(f" File: {filename}, Size: {file_size_mb:.2f} MB")
|
|
2210
|
+
print(f" Form data keys: {list(data.keys())}")
|
|
2211
|
+
try:
|
|
2212
|
+
response = self._make_request("POST", "/compute/upload_with_new_session/", files=files, data=data)
|
|
2213
|
+
print(f"🔵 [CLIENT] {upload_trace_id} Received response: HTTP {response.status_code}")
|
|
2214
|
+
except requests.exceptions.HTTPError as e:
|
|
2215
|
+
# If we get a 504 and file is large, suggest chunking
|
|
2216
|
+
if e.response and e.response.status_code == 504 and file_size_mb > CHUNK_SIZE_MB:
|
|
2217
|
+
print(f"\n❌ 504 Gateway Timeout on large file upload ({file_size_mb:.1f} MB)")
|
|
2218
|
+
print(f" File exceeds {CHUNK_SIZE_MB} MB - chunking not yet implemented for regular uploads")
|
|
2219
|
+
print(f" Consider:")
|
|
2220
|
+
print(f" 1. Splitting your data into smaller files (< {CHUNK_SIZE_MB} MB each)")
|
|
2221
|
+
print(f" 2. Using the chunked upload endpoint (if available)")
|
|
2222
|
+
print(f" 3. Retrying the upload (server may have been temporarily busy)")
|
|
2223
|
+
raise
|
|
2224
|
+
else:
|
|
2225
|
+
raise
|
|
2150
2226
|
|
|
2151
2227
|
response_data = response.json()
|
|
2152
2228
|
session_id = response_data.get('session_id')
|
|
@@ -5296,6 +5372,219 @@ class FeatrixSphereClient:
|
|
|
5296
5372
|
if verbose:
|
|
5297
5373
|
print(f"❌ Error starting predictor continuation: {e}")
|
|
5298
5374
|
raise
|
|
5375
|
+
|
|
5376
|
+
def train_on_partial_foundation(
|
|
5377
|
+
self,
|
|
5378
|
+
foundation_session_id: str,
|
|
5379
|
+
local_data_file: str,
|
|
5380
|
+
target_column: str,
|
|
5381
|
+
target_column_type: str,
|
|
5382
|
+
checkpoint_epoch: int = None,
|
|
5383
|
+
epochs: int = 0,
|
|
5384
|
+
rare_label_value: str = None,
|
|
5385
|
+
use_class_weights: bool = True,
|
|
5386
|
+
class_imbalance: dict = None,
|
|
5387
|
+
cost_false_positive: float = None,
|
|
5388
|
+
cost_false_negative: float = None,
|
|
5389
|
+
name: str = None,
|
|
5390
|
+
session_name_prefix: str = None,
|
|
5391
|
+
webhooks: Dict[str, str] = None,
|
|
5392
|
+
poll_interval: int = 30,
|
|
5393
|
+
max_poll_time: int = 3600,
|
|
5394
|
+
verbose: bool = True
|
|
5395
|
+
) -> Dict[str, Any]:
|
|
5396
|
+
"""
|
|
5397
|
+
Train a predictor on a partial/in-progress foundation model.
|
|
5398
|
+
|
|
5399
|
+
Designed for use while the foundation model is STILL TRAINING. Test predictor
|
|
5400
|
+
performance on intermediate checkpoints without waiting for full ES training
|
|
5401
|
+
to complete. Also works with completed foundation training.
|
|
5402
|
+
|
|
5403
|
+
Creates a NEW session using:
|
|
5404
|
+
- Checkpoint from foundation's ES training (specific epoch or best/latest)
|
|
5405
|
+
- Your uploaded data file for predictor training
|
|
5406
|
+
- Metadata about foundation training progress at checkpoint time
|
|
5407
|
+
|
|
5408
|
+
Perfect for:
|
|
5409
|
+
- Testing predictor performance while ES is still training
|
|
5410
|
+
- Training on partially-trained foundation models
|
|
5411
|
+
- Running parallel experiments on different checkpoint epochs
|
|
5412
|
+
|
|
5413
|
+
Args:
|
|
5414
|
+
foundation_session_id: Session ID with ES training (in-progress or completed)
|
|
5415
|
+
local_data_file: Path to CSV file on YOUR MACHINE (will be uploaded)
|
|
5416
|
+
target_column: Column to predict
|
|
5417
|
+
target_column_type: "set" for classification, "scalar" for regression
|
|
5418
|
+
checkpoint_epoch: Which epoch checkpoint to use (None = best/latest available)
|
|
5419
|
+
epochs: Predictor training epochs (0 = auto-calculate based on data size)
|
|
5420
|
+
rare_label_value: Minority class label for binary classification
|
|
5421
|
+
use_class_weights: Enable class weighting for imbalanced data
|
|
5422
|
+
class_imbalance: Expected class distribution from real world
|
|
5423
|
+
cost_false_positive: Cost of false positive (classification only)
|
|
5424
|
+
cost_false_negative: Cost of false negative (classification only)
|
|
5425
|
+
name: Optional name for the new session
|
|
5426
|
+
session_name_prefix: Optional prefix for session ID
|
|
5427
|
+
webhooks: Webhook configuration dict
|
|
5428
|
+
poll_interval: Seconds between status checks (default: 30)
|
|
5429
|
+
max_poll_time: Maximum polling time in seconds (default: 3600 = 1 hour)
|
|
5430
|
+
verbose: Print status updates
|
|
5431
|
+
|
|
5432
|
+
Returns:
|
|
5433
|
+
dict with:
|
|
5434
|
+
- new_session_id: Created session ID
|
|
5435
|
+
- foundation_session_id: Original foundation session
|
|
5436
|
+
- checkpoint_epoch: Epoch used for checkpoint
|
|
5437
|
+
- foundation_training_metadata: Progress info when checkpoint was created
|
|
5438
|
+
- target_column: Column being predicted
|
|
5439
|
+
- predictor_epochs: Training epochs
|
|
5440
|
+
|
|
5441
|
+
Example:
|
|
5442
|
+
```python
|
|
5443
|
+
# Train on epoch 50 checkpoint while ES is still training
|
|
5444
|
+
result = client.train_on_partial_foundation(
|
|
5445
|
+
foundation_session_id="abc-def-ghi",
|
|
5446
|
+
local_data_file="my_data.csv",
|
|
5447
|
+
target_column="price",
|
|
5448
|
+
target_column_type="scalar",
|
|
5449
|
+
checkpoint_epoch=50,
|
|
5450
|
+
epochs=100
|
|
5451
|
+
)
|
|
5452
|
+
|
|
5453
|
+
print(f"New session: {result['new_session_id']}")
|
|
5454
|
+
print(f"Foundation was {result['foundation_training_metadata']['training_progress_percent']}% trained")
|
|
5455
|
+
```
|
|
5456
|
+
"""
|
|
5457
|
+
import os
|
|
5458
|
+
from pathlib import Path
|
|
5459
|
+
|
|
5460
|
+
# Validate inputs
|
|
5461
|
+
if not os.path.exists(local_data_file):
|
|
5462
|
+
raise FileNotFoundError(f"Local data file not found: {local_data_file}")
|
|
5463
|
+
|
|
5464
|
+
if target_column_type not in ["set", "scalar"]:
|
|
5465
|
+
raise ValueError("target_column_type must be 'set' or 'scalar'")
|
|
5466
|
+
|
|
5467
|
+
# Prepare form data
|
|
5468
|
+
form_data = {
|
|
5469
|
+
'target_column': target_column,
|
|
5470
|
+
'target_column_type': target_column_type,
|
|
5471
|
+
'epochs': str(epochs),
|
|
5472
|
+
'use_class_weights': str(use_class_weights).lower(),
|
|
5473
|
+
}
|
|
5474
|
+
|
|
5475
|
+
if checkpoint_epoch is not None:
|
|
5476
|
+
form_data['checkpoint_epoch'] = str(checkpoint_epoch)
|
|
5477
|
+
if rare_label_value:
|
|
5478
|
+
form_data['rare_label_value'] = rare_label_value
|
|
5479
|
+
if class_imbalance:
|
|
5480
|
+
import json
|
|
5481
|
+
form_data['class_imbalance'] = json.dumps(class_imbalance)
|
|
5482
|
+
if cost_false_positive is not None:
|
|
5483
|
+
form_data['cost_false_positive'] = str(cost_false_positive)
|
|
5484
|
+
if cost_false_negative is not None:
|
|
5485
|
+
form_data['cost_false_negative'] = str(cost_false_negative)
|
|
5486
|
+
if name:
|
|
5487
|
+
form_data['name'] = name
|
|
5488
|
+
if session_name_prefix:
|
|
5489
|
+
form_data['session_name_prefix'] = session_name_prefix
|
|
5490
|
+
if webhooks:
|
|
5491
|
+
import json
|
|
5492
|
+
form_data['webhooks'] = json.dumps(webhooks)
|
|
5493
|
+
|
|
5494
|
+
# Upload file
|
|
5495
|
+
try:
|
|
5496
|
+
with open(local_data_file, 'rb') as f:
|
|
5497
|
+
files = {'file': (Path(local_data_file).name, f, 'text/csv')}
|
|
5498
|
+
response_data = self._post_multipart(
|
|
5499
|
+
f"/compute/session/{foundation_session_id}/train_on_partial_foundation",
|
|
5500
|
+
data=form_data,
|
|
5501
|
+
files=files
|
|
5502
|
+
)
|
|
5503
|
+
|
|
5504
|
+
new_session_id = response_data.get('new_session_id')
|
|
5505
|
+
checkpoint_epoch_used = response_data.get('checkpoint_epoch')
|
|
5506
|
+
training_metadata = response_data.get('foundation_training_metadata', {})
|
|
5507
|
+
|
|
5508
|
+
if verbose:
|
|
5509
|
+
print(f"✅ {response_data.get('message')}")
|
|
5510
|
+
print(f" New session ID: {new_session_id}")
|
|
5511
|
+
print(f" Checkpoint epoch: {checkpoint_epoch_used}")
|
|
5512
|
+
if training_metadata.get('training_progress_percent'):
|
|
5513
|
+
print(f" Foundation training progress: {training_metadata['training_progress_percent']}%")
|
|
5514
|
+
if training_metadata.get('validation_loss_at_checkpoint'):
|
|
5515
|
+
print(f" Val loss at checkpoint: {training_metadata['validation_loss_at_checkpoint']:.4f}")
|
|
5516
|
+
|
|
5517
|
+
# Poll for completion if requested
|
|
5518
|
+
if poll_interval > 0 and max_poll_time > 0 and new_session_id:
|
|
5519
|
+
import time
|
|
5520
|
+
start_time = time.time()
|
|
5521
|
+
last_message = ""
|
|
5522
|
+
|
|
5523
|
+
while time.time() - start_time < max_poll_time:
|
|
5524
|
+
try:
|
|
5525
|
+
session_info = self.get_session_status(new_session_id)
|
|
5526
|
+
jobs = session_info.jobs if hasattr(session_info, 'jobs') else {}
|
|
5527
|
+
|
|
5528
|
+
# Find predictor training jobs
|
|
5529
|
+
sp_jobs = {j_id: j for j_id, j in jobs.items()
|
|
5530
|
+
if 'train_single_predictor' in j.get('type', '')}
|
|
5531
|
+
|
|
5532
|
+
if not sp_jobs:
|
|
5533
|
+
if verbose:
|
|
5534
|
+
print("⏳ Waiting for predictor job to start...")
|
|
5535
|
+
time.sleep(poll_interval)
|
|
5536
|
+
continue
|
|
5537
|
+
|
|
5538
|
+
# Check statuses
|
|
5539
|
+
running = [j_id for j_id, j in sp_jobs.items() if j.get('status') == 'running']
|
|
5540
|
+
done = [j_id for j_id, j in sp_jobs.items() if j.get('status') == 'done']
|
|
5541
|
+
failed = [j_id for j_id, j in sp_jobs.items() if j.get('status') == 'failed']
|
|
5542
|
+
|
|
5543
|
+
status_msg = f"Running: {len(running)}, Done: {len(done)}, Failed: {len(failed)}"
|
|
5544
|
+
if status_msg != last_message and verbose:
|
|
5545
|
+
print(f"📊 {status_msg}")
|
|
5546
|
+
last_message = status_msg
|
|
5547
|
+
|
|
5548
|
+
if not running and (done or failed):
|
|
5549
|
+
if done:
|
|
5550
|
+
if verbose:
|
|
5551
|
+
print(f"✅ Predictor training completed!")
|
|
5552
|
+
return {
|
|
5553
|
+
**response_data,
|
|
5554
|
+
"status": "completed",
|
|
5555
|
+
"poll_time": int(time.time() - start_time)
|
|
5556
|
+
}
|
|
5557
|
+
elif failed:
|
|
5558
|
+
if verbose:
|
|
5559
|
+
print(f"❌ Predictor training failed")
|
|
5560
|
+
return {
|
|
5561
|
+
**response_data,
|
|
5562
|
+
"status": "failed",
|
|
5563
|
+
"failed_jobs": failed,
|
|
5564
|
+
"poll_time": int(time.time() - start_time)
|
|
5565
|
+
}
|
|
5566
|
+
|
|
5567
|
+
time.sleep(poll_interval)
|
|
5568
|
+
except Exception as poll_error:
|
|
5569
|
+
if verbose:
|
|
5570
|
+
print(f"⚠️ Polling error: {poll_error}")
|
|
5571
|
+
time.sleep(poll_interval)
|
|
5572
|
+
|
|
5573
|
+
if verbose:
|
|
5574
|
+
print(f"⏰ Polling timeout ({max_poll_time}s). Training may still be in progress.")
|
|
5575
|
+
|
|
5576
|
+
return {
|
|
5577
|
+
**response_data,
|
|
5578
|
+
"status": "timeout",
|
|
5579
|
+
"poll_time": max_poll_time
|
|
5580
|
+
}
|
|
5581
|
+
|
|
5582
|
+
return response_data
|
|
5583
|
+
|
|
5584
|
+
except Exception as e:
|
|
5585
|
+
if verbose:
|
|
5586
|
+
print(f"❌ Error training on partial foundation: {e}")
|
|
5587
|
+
raise
|
|
5299
5588
|
|
|
5300
5589
|
def foundation_model_train_more(self, session_id: str, es_id: str = None, data_passes: int = None,
|
|
5301
5590
|
epochs: int = None, poll_interval: int = 30, max_poll_time: int = 3600,
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
featrixsphere/__init__.py,sha256=I27lMJL_tBPzKyo_79loiIS83AAC-vuoz1kA3ZY2fhc,1888
|
|
2
|
+
featrixsphere/client.py,sha256=L97tRb-6pCvP7lKYOsK4iYHfsFt3V0URu4RO1mQzFoQ,401468
|
|
3
|
+
featrixsphere/test_client.py,sha256=4SiRbib0ms3poK0UpnUv4G0HFQSzidF3Iswo_J2cjLk,11981
|
|
4
|
+
featrixsphere-0.2.1830.dist-info/METADATA,sha256=IFITUpYkfYT2s7WXDX0-5Xl-iiUBt6bb69-Mr9_w6O8,16232
|
|
5
|
+
featrixsphere-0.2.1830.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
featrixsphere-0.2.1830.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
|
|
7
|
+
featrixsphere-0.2.1830.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
|
|
8
|
+
featrixsphere-0.2.1830.dist-info/RECORD,,
|
featrixsphere/cli.py
DELETED
|
@@ -1,338 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Featrix Sphere CLI
|
|
4
|
-
|
|
5
|
-
Command-line interface for the Featrix Sphere API client.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import argparse
|
|
9
|
-
import sys
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from .client import FeatrixSphereClient
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def main():
|
|
15
|
-
"""Main CLI entry point."""
|
|
16
|
-
parser = argparse.ArgumentParser(
|
|
17
|
-
description="Featrix Sphere API Client - Transform CSV to ML models",
|
|
18
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
19
|
-
epilog="""
|
|
20
|
-
Examples:
|
|
21
|
-
# Upload data and create session
|
|
22
|
-
featrix upload data.csv --server YOUR_SERVER_URL
|
|
23
|
-
|
|
24
|
-
# Test predictions on CSV
|
|
25
|
-
featrix test SESSION_ID test.csv target_column --server YOUR_SERVER_URL
|
|
26
|
-
|
|
27
|
-
# Make single prediction from JSON
|
|
28
|
-
featrix predict SESSION_ID '{"feature": "value"}' --server YOUR_SERVER_URL
|
|
29
|
-
|
|
30
|
-
# Make predictions from CSV file
|
|
31
|
-
featrix predict SESSION_ID test.csv --server YOUR_SERVER_URL --sample-size 50
|
|
32
|
-
"""
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
parser.add_argument("--server", default="https://sphere-api.featrix.com",
|
|
36
|
-
help="Featrix Sphere server URL")
|
|
37
|
-
parser.add_argument("--compute-cluster", type=str, default=None,
|
|
38
|
-
help="Compute cluster port for X-Sphere-Compute header")
|
|
39
|
-
parser.add_argument("--version", action="version",
|
|
40
|
-
version=f"featrixsphere {__import__('featrixsphere').__version__}")
|
|
41
|
-
|
|
42
|
-
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
43
|
-
|
|
44
|
-
# Upload command
|
|
45
|
-
upload_parser = subparsers.add_parser("upload", help="Upload CSV and create session")
|
|
46
|
-
upload_parser.add_argument("csv_file", help="CSV file to upload")
|
|
47
|
-
upload_parser.add_argument("--wait", action="store_true",
|
|
48
|
-
help="Wait for training to complete")
|
|
49
|
-
|
|
50
|
-
# Test command
|
|
51
|
-
test_parser = subparsers.add_parser("test", help="Test predictions on CSV")
|
|
52
|
-
test_parser.add_argument("session_id", help="Session ID")
|
|
53
|
-
test_parser.add_argument("csv_file", help="CSV file to test")
|
|
54
|
-
test_parser.add_argument("target_column", help="Target column name")
|
|
55
|
-
test_parser.add_argument("--sample-size", type=int, default=100,
|
|
56
|
-
help="Number of records to test (default: 100)")
|
|
57
|
-
|
|
58
|
-
# Predict command
|
|
59
|
-
predict_parser = subparsers.add_parser("predict", help="Make single prediction")
|
|
60
|
-
predict_parser.add_argument("session_id", help="Session ID")
|
|
61
|
-
predict_parser.add_argument("input", help="JSON record or CSV file to predict")
|
|
62
|
-
predict_parser.add_argument("--sample-size", type=int, default=1000,
|
|
63
|
-
help="Number of records to predict from CSV (default: 1000)")
|
|
64
|
-
predict_parser.add_argument("-v", "--verbose", action="store_true",
|
|
65
|
-
help="Print full results structure")
|
|
66
|
-
|
|
67
|
-
# Status command
|
|
68
|
-
status_parser = subparsers.add_parser("status", help="Check session status")
|
|
69
|
-
status_parser.add_argument("session_id", help="Session ID")
|
|
70
|
-
|
|
71
|
-
args = parser.parse_args()
|
|
72
|
-
|
|
73
|
-
if not args.command:
|
|
74
|
-
parser.print_help()
|
|
75
|
-
return 1
|
|
76
|
-
|
|
77
|
-
try:
|
|
78
|
-
client = FeatrixSphereClient(args.server, compute_cluster=args.compute_cluster)
|
|
79
|
-
|
|
80
|
-
if args.command == "upload":
|
|
81
|
-
return cmd_upload(client, args)
|
|
82
|
-
elif args.command == "test":
|
|
83
|
-
return cmd_test(client, args)
|
|
84
|
-
elif args.command == "predict":
|
|
85
|
-
return cmd_predict(client, args)
|
|
86
|
-
elif args.command == "status":
|
|
87
|
-
return cmd_status(client, args)
|
|
88
|
-
else:
|
|
89
|
-
print(f"Unknown command: {args.command}")
|
|
90
|
-
return 1
|
|
91
|
-
|
|
92
|
-
except Exception as e:
|
|
93
|
-
print(f"Error: {e}")
|
|
94
|
-
return 1
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def cmd_upload(client, args):
|
|
98
|
-
"""Handle upload command."""
|
|
99
|
-
csv_file = Path(args.csv_file)
|
|
100
|
-
if not csv_file.exists():
|
|
101
|
-
print(f"File not found: {csv_file}")
|
|
102
|
-
return 1
|
|
103
|
-
|
|
104
|
-
print(f"Uploading {csv_file} to {client.base_url}...")
|
|
105
|
-
session = client.upload_file_and_create_session(csv_file)
|
|
106
|
-
|
|
107
|
-
print(f"✅ Session created: {session.session_id}")
|
|
108
|
-
print(f"Status: {session.status}")
|
|
109
|
-
|
|
110
|
-
if args.wait:
|
|
111
|
-
print("Waiting for training to complete...")
|
|
112
|
-
final_session = client.wait_for_session_completion(session.session_id)
|
|
113
|
-
print(f"✅ Training completed with status: {final_session.status}")
|
|
114
|
-
|
|
115
|
-
return 0
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def cmd_test(client, args):
|
|
119
|
-
"""Handle test command."""
|
|
120
|
-
csv_file = Path(args.csv_file)
|
|
121
|
-
if not csv_file.exists():
|
|
122
|
-
print(f"File not found: {csv_file}")
|
|
123
|
-
return 1
|
|
124
|
-
|
|
125
|
-
print(f"Testing predictions for session {args.session_id}...")
|
|
126
|
-
|
|
127
|
-
results = client.test_csv_predictions(
|
|
128
|
-
session_id=args.session_id,
|
|
129
|
-
csv_file=str(csv_file),
|
|
130
|
-
target_column=args.target_column,
|
|
131
|
-
sample_size=args.sample_size
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
if results.get('accuracy_metrics'):
|
|
135
|
-
metrics = results['accuracy_metrics']
|
|
136
|
-
print(f"\n🎯 Results:")
|
|
137
|
-
print(f"Accuracy: {metrics['accuracy']*100:.2f}%")
|
|
138
|
-
print(f"Confidence: {metrics['average_confidence']*100:.2f}%")
|
|
139
|
-
print(f"Correct: {metrics['correct_predictions']}/{metrics['total_predictions']}")
|
|
140
|
-
else:
|
|
141
|
-
print(f"✅ Predictions completed: {results['successful_predictions']} successful")
|
|
142
|
-
|
|
143
|
-
return 0
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def cmd_predict(client, args):
|
|
147
|
-
"""Handle predict command."""
|
|
148
|
-
import json
|
|
149
|
-
import pandas as pd
|
|
150
|
-
from pathlib import Path
|
|
151
|
-
|
|
152
|
-
input_path = Path(args.input)
|
|
153
|
-
|
|
154
|
-
# Check if input is a CSV file
|
|
155
|
-
if input_path.exists() and input_path.suffix.lower() == '.csv':
|
|
156
|
-
print(f"📊 Loading CSV file: {input_path}")
|
|
157
|
-
try:
|
|
158
|
-
df = pd.read_csv(input_path)
|
|
159
|
-
print(f" Loaded {len(df)} records")
|
|
160
|
-
|
|
161
|
-
# Sample records if requested (default is 1000, so only sample if explicitly set lower)
|
|
162
|
-
if args.sample_size < len(df):
|
|
163
|
-
df = df.sample(n=args.sample_size, random_state=42)
|
|
164
|
-
print(f" Sampling {len(df)} records for prediction")
|
|
165
|
-
|
|
166
|
-
# Remove target column to avoid warnings
|
|
167
|
-
target_column = 'Cancellation Non Renewal'
|
|
168
|
-
if target_column in df.columns:
|
|
169
|
-
df = df.drop(columns=[target_column])
|
|
170
|
-
print(f" Removed target column '{target_column}' from prediction data")
|
|
171
|
-
|
|
172
|
-
print(f"\n🎯 Making predictions for {len(df)} records...")
|
|
173
|
-
|
|
174
|
-
# Make individual predictions (simpler and more reliable)
|
|
175
|
-
results = []
|
|
176
|
-
for i, (_, row) in enumerate(df.iterrows()):
|
|
177
|
-
record = row.to_dict()
|
|
178
|
-
print(f" Predicting record {i+1}/{len(df)}...", end=" ")
|
|
179
|
-
|
|
180
|
-
try:
|
|
181
|
-
result = client.predict(args.session_id, record)
|
|
182
|
-
|
|
183
|
-
# Extract prediction from response
|
|
184
|
-
if 'results' in result:
|
|
185
|
-
prediction = result['results']
|
|
186
|
-
# Get top prediction
|
|
187
|
-
predicted_class = max(prediction, key=prediction.get)
|
|
188
|
-
confidence = prediction[predicted_class]
|
|
189
|
-
elif 'metadata' in result:
|
|
190
|
-
# Use metadata if available (server already computed highest probability)
|
|
191
|
-
predicted_class = result['metadata'].get('predicted_class', 'Unknown')
|
|
192
|
-
confidence = result['metadata'].get('confidence', 0.0)
|
|
193
|
-
prediction = {predicted_class: confidence}
|
|
194
|
-
else:
|
|
195
|
-
raise ValueError("No prediction results found in response")
|
|
196
|
-
|
|
197
|
-
results.append({
|
|
198
|
-
'record': i+1,
|
|
199
|
-
'prediction': predicted_class,
|
|
200
|
-
'confidence': confidence*100
|
|
201
|
-
})
|
|
202
|
-
|
|
203
|
-
if args.verbose:
|
|
204
|
-
print(f"→ {predicted_class} ({confidence*100:.6f}%)")
|
|
205
|
-
print(f" Full results: {prediction}")
|
|
206
|
-
else:
|
|
207
|
-
print(f"→ {predicted_class} ({confidence*100:.6f}%)")
|
|
208
|
-
|
|
209
|
-
except Exception as e:
|
|
210
|
-
print(f"❌ Error: {e}")
|
|
211
|
-
results.append({
|
|
212
|
-
'record': i+1,
|
|
213
|
-
'prediction': 'ERROR',
|
|
214
|
-
'confidence': 0
|
|
215
|
-
})
|
|
216
|
-
|
|
217
|
-
# Summary
|
|
218
|
-
print(f"\n📈 Prediction Summary:")
|
|
219
|
-
successful = [r for r in results if r['prediction'] != 'ERROR']
|
|
220
|
-
if successful:
|
|
221
|
-
avg_confidence = sum(r['confidence'] for r in successful) / len(successful)
|
|
222
|
-
print(f" ✅ {len(successful)}/{len(results)} successful predictions")
|
|
223
|
-
print(f" 📊 Average confidence: {avg_confidence:.6f}%")
|
|
224
|
-
|
|
225
|
-
# Show prediction distribution
|
|
226
|
-
from collections import Counter
|
|
227
|
-
pred_counts = Counter(r['prediction'] for r in successful)
|
|
228
|
-
print(f" 📋 Prediction distribution:")
|
|
229
|
-
for pred, count in pred_counts.most_common():
|
|
230
|
-
pct = (count / len(successful)) * 100
|
|
231
|
-
print(f" {pred}: {count} ({pct:.1f}%)")
|
|
232
|
-
|
|
233
|
-
if args.verbose:
|
|
234
|
-
print(f"\n🔍 Full Results Structure:")
|
|
235
|
-
for result in successful:
|
|
236
|
-
print(f" Record {result['record']}: {result}")
|
|
237
|
-
|
|
238
|
-
return 0
|
|
239
|
-
|
|
240
|
-
except Exception as e:
|
|
241
|
-
print(f"❌ Error loading CSV: {e}")
|
|
242
|
-
return 1
|
|
243
|
-
|
|
244
|
-
# Handle JSON record
|
|
245
|
-
try:
|
|
246
|
-
record = json.loads(args.input)
|
|
247
|
-
except json.JSONDecodeError as e:
|
|
248
|
-
print(f"❌ Invalid JSON record: {e}")
|
|
249
|
-
print("💡 Tip: Use quotes around JSON: '{\"feature\": \"value\"}'")
|
|
250
|
-
return 1
|
|
251
|
-
|
|
252
|
-
print(f"🎯 Making prediction for session {args.session_id}...")
|
|
253
|
-
|
|
254
|
-
try:
|
|
255
|
-
result = client.predict(args.session_id, record)
|
|
256
|
-
|
|
257
|
-
# Extract prediction from response
|
|
258
|
-
if 'results' in result:
|
|
259
|
-
prediction = result['results']
|
|
260
|
-
|
|
261
|
-
print(f"\n🎯 Prediction:")
|
|
262
|
-
for class_name, confidence in prediction.items():
|
|
263
|
-
print(f" {class_name}: {confidence*100:.2f}%")
|
|
264
|
-
|
|
265
|
-
# Show top prediction
|
|
266
|
-
predicted_class = max(prediction, key=prediction.get)
|
|
267
|
-
confidence = prediction[predicted_class]
|
|
268
|
-
print(f"\n→ Predicted: {predicted_class} ({confidence*100:.1f}% confidence)")
|
|
269
|
-
|
|
270
|
-
elif 'metadata' in result:
|
|
271
|
-
# Use metadata if available (server already computed highest probability)
|
|
272
|
-
predicted_class = result['metadata'].get('predicted_class', 'Unknown')
|
|
273
|
-
confidence = result['metadata'].get('confidence', 0.0)
|
|
274
|
-
|
|
275
|
-
print(f"\n🎯 Prediction:")
|
|
276
|
-
print(f" {predicted_class}: {confidence*100:.2f}%")
|
|
277
|
-
print(f"\n→ Predicted: {predicted_class} ({confidence*100:.1f}% confidence)")
|
|
278
|
-
|
|
279
|
-
else:
|
|
280
|
-
raise ValueError("No prediction results found in response")
|
|
281
|
-
|
|
282
|
-
return 0
|
|
283
|
-
|
|
284
|
-
except Exception as e:
|
|
285
|
-
print(f"❌ Prediction failed: {e}")
|
|
286
|
-
return 1
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
def cmd_status(client, args):
|
|
290
|
-
"""Handle status command."""
|
|
291
|
-
print(f"Checking status for session {args.session_id}...")
|
|
292
|
-
|
|
293
|
-
session_info = client.get_session_status(args.session_id)
|
|
294
|
-
|
|
295
|
-
print(f"\n📊 Session Status:")
|
|
296
|
-
print(f"ID: {session_info.session_id}")
|
|
297
|
-
print(f"Type: {session_info.session_type}")
|
|
298
|
-
print(f"Status: {session_info.status}")
|
|
299
|
-
|
|
300
|
-
if session_info.jobs:
|
|
301
|
-
print(f"\n🔧 Jobs:")
|
|
302
|
-
for job_id, job in session_info.jobs.items():
|
|
303
|
-
status = job.get('status', 'unknown')
|
|
304
|
-
progress = job.get('progress')
|
|
305
|
-
job_type = job.get('type', job_id.split('_')[0])
|
|
306
|
-
|
|
307
|
-
# Build status line with progress and loss info
|
|
308
|
-
status_line = f" {job_type}: {status}"
|
|
309
|
-
|
|
310
|
-
if progress is not None:
|
|
311
|
-
# Fix percentage issue: show 100% when job is done
|
|
312
|
-
progress_pct = 100.0 if status == 'done' else (progress * 100)
|
|
313
|
-
status_line += f" ({progress_pct:.1f}%)"
|
|
314
|
-
|
|
315
|
-
# Add training metrics for ES and Single Predictor jobs
|
|
316
|
-
if job_type in ['train_es', 'train_single_predictor'] and status == 'running':
|
|
317
|
-
metrics = []
|
|
318
|
-
current_epoch = job.get('current_epoch')
|
|
319
|
-
current_loss = job.get('current_loss')
|
|
320
|
-
validation_loss = job.get('validation_loss')
|
|
321
|
-
|
|
322
|
-
if current_epoch is not None:
|
|
323
|
-
metrics.append(f"Epoch {current_epoch}")
|
|
324
|
-
if current_loss is not None:
|
|
325
|
-
metrics.append(f"Loss: {current_loss:.4f}")
|
|
326
|
-
if validation_loss is not None:
|
|
327
|
-
metrics.append(f"Val Loss: {validation_loss:.4f}")
|
|
328
|
-
|
|
329
|
-
if metrics:
|
|
330
|
-
status_line += f" - {', '.join(metrics)}"
|
|
331
|
-
|
|
332
|
-
print(status_line)
|
|
333
|
-
|
|
334
|
-
return 0
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
if __name__ == "__main__":
|
|
338
|
-
sys.exit(main())
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
featrixsphere/__init__.py,sha256=oJsLRgA3q8pSL3rPso0kfVmgPoKdivl2gt4Gj3OZW_M,1888
|
|
2
|
-
featrixsphere/cli.py,sha256=AW9O3vCvCNJ2UxVGN66eRmeN7XLSiHJlvK6JLZ9UJXc,13358
|
|
3
|
-
featrixsphere/client.py,sha256=HWbVkvvZ3MZae_hfa0ChurzrSHMnxTWDW59tOcLERag,386715
|
|
4
|
-
featrixsphere/test_client.py,sha256=4SiRbib0ms3poK0UpnUv4G0HFQSzidF3Iswo_J2cjLk,11981
|
|
5
|
-
featrixsphere-0.2.1462.dist-info/METADATA,sha256=odh92kvhANd28hRmWToVCsW8hv-jb2YLpBdFegOHNkM,16232
|
|
6
|
-
featrixsphere-0.2.1462.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
-
featrixsphere-0.2.1462.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
|
|
8
|
-
featrixsphere-0.2.1462.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
|
|
9
|
-
featrixsphere-0.2.1462.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|