featrixsphere 0.2.1461__tar.gz → 0.2.1637__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/PKG-INFO +1 -1
- featrixsphere-0.2.1637/VERSION +1 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere/__init__.py +1 -1
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere/client.py +111 -14
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere.egg-info/PKG-INFO +1 -1
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere.egg-info/SOURCES.txt +13 -3
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/api.py +3304 -3953
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/auto_upgrade_monitor.py +95 -25
- featrixsphere-0.2.1637/src/celery_app.py +2496 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/example_train_predictor.py +7 -6
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/featrix_watchdog.py +29 -1
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/gc_cleanup.py +208 -12
- featrixsphere-0.2.1637/src/lib/api_event_retry.py +442 -0
- featrixsphere-0.2.1637/src/lib/celery_job_recovery.py +560 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/es_projections.py +1 -1
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/es_training.py +233 -110
- featrixsphere-0.2.1637/src/lib/es_training_wrapper.py +51 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/dataloader_utils.py +73 -10
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/embedded_space.py +680 -261
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/encoders.py +21 -7
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/es_projection.py +6 -4
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/input_data_file.py +24 -4
- featrixsphere-0.2.1637/src/lib/featrix/neural/io_utils.py +822 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/logging_config.py +17 -2
- featrixsphere-0.2.1637/src/lib/featrix/neural/qa/test_checkpoint_dict_reconstruction.py +212 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_extend_embedding_space.py +1 -1
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/single_predictor.py +239 -124
- featrixsphere-0.2.1637/src/lib/featrix/neural/stopwatch.py +80 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/string_cache.py +7 -3
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/string_codec.py +55 -69
- featrixsphere-0.2.1637/src/lib/job_manager.py +602 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/knn_training.py +1 -1
- featrixsphere-0.2.1637/src/lib/queue_manager.py +114 -0
- featrixsphere-0.2.1637/src/lib/session_chains.py +621 -0
- featrixsphere-0.2.1637/src/lib/session_manager.py +1384 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/single_predictor_cv.py +1 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/single_predictor_training.py +164 -375
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/structureddata.py +162 -29
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/training_monitor.py +101 -11
- featrixsphere-0.2.1637/src/lib/utils.py +125 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/vector_db.py +26 -6
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/webhook_helpers.py +21 -0
- featrixsphere-0.2.1637/src/migrate_string_cache_naming.py +197 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/node-install.sh +694 -294
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/prediction_drift_monitor.py +5 -5
- featrixsphere-0.2.1637/src/redis_job_progress.py +391 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/run_api_server.sh +6 -1
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/send_email.py +1 -1
- featrixsphere-0.2.1637/src/start_celery_cpu_worker.sh +21 -0
- featrixsphere-0.2.1637/src/start_celery_gpu_worker.sh +39 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/start_celery_worker.sh +1 -1
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/start_churro_server.sh +12 -18
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_api_client.py +81 -0
- featrixsphere-0.2.1637/src/utils.py +65 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/system_monitor.py +472 -84
- featrixsphere-0.2.1461/VERSION +0 -1
- featrixsphere-0.2.1461/featrixsphere/cli.py +0 -338
- featrixsphere-0.2.1461/src/celery_app.py +0 -987
- featrixsphere-0.2.1461/src/cli.py +0 -5853
- featrixsphere-0.2.1461/src/featrix_queue.py +0 -7206
- featrixsphere-0.2.1461/src/lib/featrix/neural/stopwatch.py +0 -202
- featrixsphere-0.2.1461/src/lib/utils.py +0 -157
- featrixsphere-0.2.1461/src/utils.py +0 -14
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/MANIFEST.in +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/README.md +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrix-update.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere/test_client.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere.egg-info/dependency_links.txt +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere.egg-info/entry_points.txt +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere.egg-info/not-zip-safe +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere.egg-info/requires.txt +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/featrixsphere.egg-info/top_level.txt +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/nv-install.sh +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/requirements.txt +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/setup.cfg +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/setup.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/build_version.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/config.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/demo_existing_model.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/demo_label_updates.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/deploy.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/deploy_cache_debug.sh +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/ensure_watchdog_running.sh +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/error_tracker.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/event_log.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/example_api_usage.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/example_prediction_feedback.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/convergence_monitor.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/epoch_projections.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/__init__.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/MetaDataCache.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/__init__.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/calibration_utils.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/classification_metrics.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/config.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/data_frame_data_set.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/detect.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/device.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/domain_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/dropout_scheduler.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/embedding_lr_scheduler.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/embedding_space_utils.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/embedding_utils.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/enrich.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/exceptions.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/featrix_csv.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/featrix_json.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/featrix_module_dict.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/featrix_token.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/guardrails.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/hubspot_free_domains_list_may_2025.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/input_data_set.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/integrity.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/json_cache.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/json_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/mask_tracker.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/model_config.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/model_hash.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/movie_frame_task.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/network_viz.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/prng_control.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/demo_advisor_decisions.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/example_complete_workflow.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/generate_focal_report.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/model_advisor.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/show_results.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_adaptive_training.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_confusion_matrix_metadata.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_embedding_quality.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_embedding_space.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_focal_comparison.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_focal_comparison_enhanced.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_focal_loss_single_predictor.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_label_smoothing.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_monitor_integration.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_piecewise_epochs.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_predict_during_training.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_timeline_quick.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_training_monitor.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/test_warning_tracking.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/qa/visualize_training_timeline.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/scalar_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/set_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/setlist_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/simple_mlp.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/single_predictor_mlp.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/sphere_config.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/sqlite_utils.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/string_analysis.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/string_list_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/timestamp_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/training_context_manager.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/training_event.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/training_exceptions.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/training_history_db.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/transformer_encoder.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/url_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/url_parser.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/utils.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/vector_codec.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix/neural/world_data.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/featrix_debug.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/json_encoder_cache.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/sphere_config.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/lib/weightwatcher_tracking.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/llm_client.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/manage_churro.sh +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/neural.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/prediction_persistence_worker.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/quick_test_deployment.sh +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/recreate_session.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/redis_prediction_cli.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/redis_prediction_store.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/regenerate_training_movie.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/render_sphere.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/restart_celery_worker.sh +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/slack.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/standalone_prediction.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/tail-watch.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_complete_workflow.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_json_tables_prediction.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_redis_predictions.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_server_connection.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_session_models.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_single_predictor_api.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/test_upload_endpoint.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/tree.py +0 -0
- {featrixsphere-0.2.1461 → featrixsphere-0.2.1637}/src/version.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.2.1637
|
|
@@ -281,9 +281,20 @@ class FeatrixSphereClient:
|
|
|
281
281
|
max_retries = self.default_max_retries
|
|
282
282
|
|
|
283
283
|
# Special handling for session endpoints - longer retry window for 504 errors
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
284
|
+
# Session endpoints include /session/ and /upload_with_new_session/ (creates session)
|
|
285
|
+
is_session_endpoint = '/session/' in endpoint or '/upload_with_new_session' in endpoint
|
|
286
|
+
|
|
287
|
+
# For upload endpoints, use a much longer timeout (10 minutes for large files)
|
|
288
|
+
# This MUST happen before setting max_retry_time to ensure uploads get proper timeout
|
|
289
|
+
if '/upload_with_new_session' in endpoint:
|
|
290
|
+
# Override timeout if not explicitly set in kwargs
|
|
291
|
+
if 'timeout' not in kwargs:
|
|
292
|
+
kwargs['timeout'] = 600 # 10 minutes for file uploads
|
|
293
|
+
# Also set a longer max_retry_time for upload endpoints (10 minutes)
|
|
294
|
+
if max_retry_time is None:
|
|
295
|
+
max_retry_time = 600.0 # 10 minutes for upload endpoints
|
|
296
|
+
elif max_retry_time is None and is_session_endpoint:
|
|
297
|
+
max_retry_time = 120.0 # 120 seconds for other session endpoints
|
|
287
298
|
|
|
288
299
|
# Auto-add /compute prefix for session endpoints
|
|
289
300
|
if endpoint.startswith('/session/') and not endpoint.startswith('/compute/session/'):
|
|
@@ -323,7 +334,8 @@ class FeatrixSphereClient:
|
|
|
323
334
|
else:
|
|
324
335
|
# Out of retry time
|
|
325
336
|
print(f"API request failed: {method} {url}")
|
|
326
|
-
|
|
337
|
+
max_retry_time_str = f"{max_retry_time}s" if max_retry_time else "None"
|
|
338
|
+
print(f"504 Gateway Timeout - exceeded max retry time ({max_retry_time_str})")
|
|
327
339
|
raise
|
|
328
340
|
|
|
329
341
|
# Check for server restart patterns in 500 errors
|
|
@@ -1082,19 +1094,29 @@ class FeatrixSphereClient:
|
|
|
1082
1094
|
"""
|
|
1083
1095
|
|
|
1084
1096
|
if session_info.jobs:
|
|
1097
|
+
# Human-readable job type descriptions
|
|
1098
|
+
JOB_TYPE_NAMES = {
|
|
1099
|
+
'create_structured_data': 'Creating Structured Data',
|
|
1100
|
+
'train_es': 'Training Embedding Space',
|
|
1101
|
+
'train_knn': 'Training KNN Index',
|
|
1102
|
+
'run_clustering': 'Running Clustering',
|
|
1103
|
+
'train_single_predictor': 'Training Predictor',
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1085
1106
|
html_content += "<h4>Jobs:</h4><ul>"
|
|
1086
1107
|
for job_id, job in session_info.jobs.items():
|
|
1087
1108
|
job_status = job.get('status', 'unknown')
|
|
1088
1109
|
progress = job.get('progress')
|
|
1089
1110
|
job_type = job.get('type', job_id.split('_')[0])
|
|
1111
|
+
job_display_name = JOB_TYPE_NAMES.get(job_type, job_type)
|
|
1090
1112
|
|
|
1091
1113
|
if progress is not None:
|
|
1092
1114
|
progress_pct = progress * 100
|
|
1093
1115
|
progress_bar = "▓" * int(progress_pct / 5) + "░" * (20 - int(progress_pct / 5))
|
|
1094
|
-
html_content += f"<li><strong>{
|
|
1116
|
+
html_content += f"<li><strong>{job_display_name}:</strong> {job_status} [{progress_bar}] {progress_pct:.1f}%</li>"
|
|
1095
1117
|
else:
|
|
1096
1118
|
status_emoji = "✅" if job_status == "done" else "🔄" if job_status == "running" else "❌"
|
|
1097
|
-
html_content += f"<li>{status_emoji} <strong>{
|
|
1119
|
+
html_content += f"<li>{status_emoji} <strong>{job_display_name}:</strong> {job_status}</li>"
|
|
1098
1120
|
html_content += "</ul>"
|
|
1099
1121
|
|
|
1100
1122
|
display(HTML(html_content))
|
|
@@ -1149,6 +1171,15 @@ class FeatrixSphereClient:
|
|
|
1149
1171
|
from rich.text import Text
|
|
1150
1172
|
import time
|
|
1151
1173
|
|
|
1174
|
+
# Human-readable job type descriptions
|
|
1175
|
+
JOB_TYPE_NAMES = {
|
|
1176
|
+
'create_structured_data': 'Creating Structured Data',
|
|
1177
|
+
'train_es': 'Training Embedding Space',
|
|
1178
|
+
'train_knn': 'Training KNN Index',
|
|
1179
|
+
'run_clustering': 'Running Clustering',
|
|
1180
|
+
'train_single_predictor': 'Training Predictor',
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1152
1183
|
start_time = time.time()
|
|
1153
1184
|
job_tasks = {} # Track progress tasks for each job
|
|
1154
1185
|
|
|
@@ -1180,15 +1211,18 @@ class FeatrixSphereClient:
|
|
|
1180
1211
|
for job_id, job in session_info.jobs.items():
|
|
1181
1212
|
if job_id not in job_tasks:
|
|
1182
1213
|
job_type = job.get('type', job_id.split('_')[0])
|
|
1183
|
-
|
|
1214
|
+
job_display_name = JOB_TYPE_NAMES.get(job_type, job_type)
|
|
1215
|
+
job_tasks[job_id] = progress.add_task(f"[cyan]{job_display_name}", total=100)
|
|
1184
1216
|
|
|
1185
1217
|
# Update job progress
|
|
1186
1218
|
job_status = job.get('status', 'unknown')
|
|
1187
1219
|
raw_progress = job.get('progress', 0)
|
|
1188
1220
|
job_progress = 100 if job_status == 'done' else (raw_progress * 100 if raw_progress else 0)
|
|
1189
1221
|
|
|
1222
|
+
job_type = job.get('type', job_id.split('_')[0])
|
|
1223
|
+
job_display_name = JOB_TYPE_NAMES.get(job_type, job_type)
|
|
1190
1224
|
progress.update(job_tasks[job_id], completed=job_progress,
|
|
1191
|
-
description=f"[cyan]{
|
|
1225
|
+
description=f"[cyan]{job_display_name} ({job_status})")
|
|
1192
1226
|
|
|
1193
1227
|
# Check completion
|
|
1194
1228
|
if session_info.status in ['done', 'failed', 'cancelled']:
|
|
@@ -1232,8 +1266,20 @@ class FeatrixSphereClient:
|
|
|
1232
1266
|
status_callback(session_info, elapsed)
|
|
1233
1267
|
|
|
1234
1268
|
# Check if completed
|
|
1269
|
+
# CRITICAL: Only return done if we actually have jobs that completed
|
|
1270
|
+
# If no jobs exist, session shouldn't be marked as done
|
|
1235
1271
|
if session_info.status in ['completed', 'done', 'DONE']:
|
|
1236
|
-
|
|
1272
|
+
# Verify that jobs actually exist and completed
|
|
1273
|
+
if session_info.jobs:
|
|
1274
|
+
# Check if all jobs are in terminal states
|
|
1275
|
+
terminal_states = {'done', 'failed', 'cancelled'}
|
|
1276
|
+
all_terminal = all(job.get('status') in terminal_states for job in session_info.jobs.values())
|
|
1277
|
+
if all_terminal:
|
|
1278
|
+
return session_info
|
|
1279
|
+
else:
|
|
1280
|
+
# No jobs but status is "done" - this is a bug, keep waiting
|
|
1281
|
+
logger.warning(f"⚠️ Session {session_id} status is 'done' but no jobs exist - this shouldn't happen, continuing to wait...")
|
|
1282
|
+
# Don't return - keep waiting for jobs to appear
|
|
1237
1283
|
|
|
1238
1284
|
time.sleep(check_interval)
|
|
1239
1285
|
|
|
@@ -2145,8 +2191,38 @@ class FeatrixSphereClient:
|
|
|
2145
2191
|
data['quick_run'] = 'true'
|
|
2146
2192
|
data['epochs'] = str(epochs)
|
|
2147
2193
|
print(f"Training epochs: {epochs} (quick_run mode enabled)")
|
|
2148
|
-
|
|
2149
|
-
|
|
2194
|
+
|
|
2195
|
+
# Check file size - warn if very large
|
|
2196
|
+
file_size_mb = len(file_content) / (1024 * 1024)
|
|
2197
|
+
CHUNK_SIZE_MB = 512 # 512 MB chunk size
|
|
2198
|
+
CHUNK_SIZE_BYTES = CHUNK_SIZE_MB * 1024 * 1024
|
|
2199
|
+
|
|
2200
|
+
if file_size_mb > CHUNK_SIZE_MB:
|
|
2201
|
+
print(f"⚠️ Warning: File size ({file_size_mb:.1f} MB) exceeds {CHUNK_SIZE_MB} MB threshold")
|
|
2202
|
+
print(f" Large uploads may timeout. Consider splitting the data or using smaller batches.")
|
|
2203
|
+
|
|
2204
|
+
# Try upload with retry on 504
|
|
2205
|
+
import time
|
|
2206
|
+
upload_trace_id = f"UPLOAD-{int(time.time()*1000)}"
|
|
2207
|
+
print(f"🔵 [CLIENT] {upload_trace_id} Starting upload request")
|
|
2208
|
+
print(f" Endpoint: /compute/upload_with_new_session/")
|
|
2209
|
+
print(f" File: {filename}, Size: {file_size_mb:.2f} MB")
|
|
2210
|
+
print(f" Form data keys: {list(data.keys())}")
|
|
2211
|
+
try:
|
|
2212
|
+
response = self._make_request("POST", "/compute/upload_with_new_session/", files=files, data=data)
|
|
2213
|
+
print(f"🔵 [CLIENT] {upload_trace_id} Received response: HTTP {response.status_code}")
|
|
2214
|
+
except requests.exceptions.HTTPError as e:
|
|
2215
|
+
# If we get a 504 and file is large, suggest chunking
|
|
2216
|
+
if e.response and e.response.status_code == 504 and file_size_mb > CHUNK_SIZE_MB:
|
|
2217
|
+
print(f"\n❌ 504 Gateway Timeout on large file upload ({file_size_mb:.1f} MB)")
|
|
2218
|
+
print(f" File exceeds {CHUNK_SIZE_MB} MB - chunking not yet implemented for regular uploads")
|
|
2219
|
+
print(f" Consider:")
|
|
2220
|
+
print(f" 1. Splitting your data into smaller files (< {CHUNK_SIZE_MB} MB each)")
|
|
2221
|
+
print(f" 2. Using the chunked upload endpoint (if available)")
|
|
2222
|
+
print(f" 3. Retrying the upload (server may have been temporarily busy)")
|
|
2223
|
+
raise
|
|
2224
|
+
else:
|
|
2225
|
+
raise
|
|
2150
2226
|
|
|
2151
2227
|
response_data = response.json()
|
|
2152
2228
|
session_id = response_data.get('session_id')
|
|
@@ -4248,9 +4324,28 @@ class FeatrixSphereClient:
|
|
|
4248
4324
|
|
|
4249
4325
|
# Get the compute cluster from the foundation model session
|
|
4250
4326
|
# This ensures we upload files to the same node where the foundation model lives
|
|
4251
|
-
|
|
4252
|
-
foundation_compute_cluster =
|
|
4253
|
-
|
|
4327
|
+
# If the foundation session doesn't exist (404), we'll proceed with current compute cluster
|
|
4328
|
+
foundation_compute_cluster = None
|
|
4329
|
+
try:
|
|
4330
|
+
foundation_session = self.get_session_status(foundation_model_id)
|
|
4331
|
+
foundation_compute_cluster = self.get_last_server_metadata()
|
|
4332
|
+
foundation_compute_cluster = foundation_compute_cluster.get('compute_cluster') if foundation_compute_cluster else None
|
|
4333
|
+
except Exception as e:
|
|
4334
|
+
# Foundation session might not exist or be accessible - that's okay
|
|
4335
|
+
# The server will validate it when we submit the training request
|
|
4336
|
+
if verbose:
|
|
4337
|
+
# Check if it's a 404 HTTP error
|
|
4338
|
+
is_404 = False
|
|
4339
|
+
if isinstance(e, requests.exceptions.HTTPError):
|
|
4340
|
+
if hasattr(e, 'response') and e.response.status_code == 404:
|
|
4341
|
+
is_404 = True
|
|
4342
|
+
|
|
4343
|
+
if is_404:
|
|
4344
|
+
print(f" ⚠️ Foundation session not found (404) - will use current compute cluster")
|
|
4345
|
+
print(f" Server will validate foundation model when training starts")
|
|
4346
|
+
else:
|
|
4347
|
+
print(f" ⚠️ Could not fetch foundation session: {e}")
|
|
4348
|
+
print(f" Will proceed with current compute cluster")
|
|
4254
4349
|
|
|
4255
4350
|
# Temporarily set compute cluster for file uploads if we found one
|
|
4256
4351
|
original_compute_cluster = self.compute_cluster
|
|
@@ -4259,6 +4354,8 @@ class FeatrixSphereClient:
|
|
|
4259
4354
|
self.set_compute_cluster(foundation_compute_cluster)
|
|
4260
4355
|
if verbose:
|
|
4261
4356
|
print(f" Using compute cluster: {foundation_compute_cluster}")
|
|
4357
|
+
elif verbose and self.compute_cluster:
|
|
4358
|
+
print(f" Using current compute cluster: {self.compute_cluster}")
|
|
4262
4359
|
|
|
4263
4360
|
try:
|
|
4264
4361
|
# Validate that only one data source is provided
|
|
@@ -7,7 +7,6 @@ requirements.txt
|
|
|
7
7
|
setup.py
|
|
8
8
|
system_monitor.py
|
|
9
9
|
featrixsphere/__init__.py
|
|
10
|
-
featrixsphere/cli.py
|
|
11
10
|
featrixsphere/client.py
|
|
12
11
|
featrixsphere/test_client.py
|
|
13
12
|
featrixsphere.egg-info/PKG-INFO
|
|
@@ -21,7 +20,6 @@ src/api.py
|
|
|
21
20
|
src/auto_upgrade_monitor.py
|
|
22
21
|
src/build_version.py
|
|
23
22
|
src/celery_app.py
|
|
24
|
-
src/cli.py
|
|
25
23
|
src/config.py
|
|
26
24
|
src/demo_existing_model.py
|
|
27
25
|
src/demo_label_updates.py
|
|
@@ -33,17 +31,18 @@ src/event_log.py
|
|
|
33
31
|
src/example_api_usage.py
|
|
34
32
|
src/example_prediction_feedback.py
|
|
35
33
|
src/example_train_predictor.py
|
|
36
|
-
src/featrix_queue.py
|
|
37
34
|
src/featrix_watchdog.py
|
|
38
35
|
src/gc_cleanup.py
|
|
39
36
|
src/llm_client.py
|
|
40
37
|
src/manage_churro.sh
|
|
38
|
+
src/migrate_string_cache_naming.py
|
|
41
39
|
src/neural.py
|
|
42
40
|
src/node-install.sh
|
|
43
41
|
src/prediction_drift_monitor.py
|
|
44
42
|
src/prediction_persistence_worker.py
|
|
45
43
|
src/quick_test_deployment.sh
|
|
46
44
|
src/recreate_session.py
|
|
45
|
+
src/redis_job_progress.py
|
|
47
46
|
src/redis_prediction_cli.py
|
|
48
47
|
src/redis_prediction_store.py
|
|
49
48
|
src/regenerate_training_movie.py
|
|
@@ -53,6 +52,8 @@ src/run_api_server.sh
|
|
|
53
52
|
src/send_email.py
|
|
54
53
|
src/slack.py
|
|
55
54
|
src/standalone_prediction.py
|
|
55
|
+
src/start_celery_cpu_worker.sh
|
|
56
|
+
src/start_celery_gpu_worker.sh
|
|
56
57
|
src/start_celery_worker.sh
|
|
57
58
|
src/start_churro_server.sh
|
|
58
59
|
src/tail-watch.py
|
|
@@ -67,13 +68,20 @@ src/test_upload_endpoint.py
|
|
|
67
68
|
src/tree.py
|
|
68
69
|
src/utils.py
|
|
69
70
|
src/version.py
|
|
71
|
+
src/lib/api_event_retry.py
|
|
72
|
+
src/lib/celery_job_recovery.py
|
|
70
73
|
src/lib/convergence_monitor.py
|
|
71
74
|
src/lib/epoch_projections.py
|
|
72
75
|
src/lib/es_projections.py
|
|
73
76
|
src/lib/es_training.py
|
|
77
|
+
src/lib/es_training_wrapper.py
|
|
74
78
|
src/lib/featrix_debug.py
|
|
79
|
+
src/lib/job_manager.py
|
|
75
80
|
src/lib/json_encoder_cache.py
|
|
76
81
|
src/lib/knn_training.py
|
|
82
|
+
src/lib/queue_manager.py
|
|
83
|
+
src/lib/session_chains.py
|
|
84
|
+
src/lib/session_manager.py
|
|
77
85
|
src/lib/single_predictor_cv.py
|
|
78
86
|
src/lib/single_predictor_training.py
|
|
79
87
|
src/lib/sphere_config.py
|
|
@@ -112,6 +120,7 @@ src/lib/featrix/neural/hubspot_free_domains_list_may_2025.py
|
|
|
112
120
|
src/lib/featrix/neural/input_data_file.py
|
|
113
121
|
src/lib/featrix/neural/input_data_set.py
|
|
114
122
|
src/lib/featrix/neural/integrity.py
|
|
123
|
+
src/lib/featrix/neural/io_utils.py
|
|
115
124
|
src/lib/featrix/neural/json_cache.py
|
|
116
125
|
src/lib/featrix/neural/json_codec.py
|
|
117
126
|
src/lib/featrix/neural/logging_config.py
|
|
@@ -151,6 +160,7 @@ src/lib/featrix/neural/qa/generate_focal_report.py
|
|
|
151
160
|
src/lib/featrix/neural/qa/model_advisor.py
|
|
152
161
|
src/lib/featrix/neural/qa/show_results.py
|
|
153
162
|
src/lib/featrix/neural/qa/test_adaptive_training.py
|
|
163
|
+
src/lib/featrix/neural/qa/test_checkpoint_dict_reconstruction.py
|
|
154
164
|
src/lib/featrix/neural/qa/test_confusion_matrix_metadata.py
|
|
155
165
|
src/lib/featrix/neural/qa/test_embedding_quality.py
|
|
156
166
|
src/lib/featrix/neural/qa/test_embedding_space.py
|