featrixsphere 0.2.1830__tar.gz → 0.2.2280__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/PKG-INFO +1 -1
- featrixsphere-0.2.2280/VERSION +1 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrix-update.py +75 -14
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere/__init__.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere/client.py +331 -68
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/PKG-INFO +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/SOURCES.txt +21 -3
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/requirements.txt +0 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/api.py +614 -94
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/celery_app.py +1036 -256
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/config.py +9 -5
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/event_log.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/featrix_watchdog.py +11 -6
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/celery_job_recovery.py +133 -6
- featrixsphere-0.2.2280/src/lib/distribution_shift_detector.py +693 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/epoch_projections.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/es_projections.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/es_training.py +53 -18
- featrixsphere-0.2.2280/src/lib/es_training_wrapper.py +131 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/__init__.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/__init__.py +2 -2
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/calibration_utils.py +8 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/classification_metrics.py +13 -12
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/config.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/data_frame_data_set.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/dataloader_utils.py +44 -22
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/detect.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/domain_codec.py +3 -3
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedded_space.py +651 -250
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedding_lr_scheduler.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedding_space_utils.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedding_utils.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/encoders.py +69 -19
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/enrich.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/es_projection.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/exceptions.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_csv.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_json.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_module_dict.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_token.py +7 -5
- featrixsphere-0.2.2280/src/lib/featrix/neural/feature_engineer.py +306 -0
- featrixsphere-0.2.2280/src/lib/featrix/neural/feature_suggestion_tracker.py +372 -0
- featrixsphere-0.2.2280/src/lib/featrix/neural/gpu_utils.py +724 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/guardrails.py +1 -1
- featrixsphere-0.2.2280/src/lib/featrix/neural/hybrid_column_detector.py +466 -0
- featrixsphere-0.2.2280/src/lib/featrix/neural/hybrid_encoders.py +344 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/input_data_file.py +15 -8
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/input_data_set.py +419 -44
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/integrity.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/io_utils.py +497 -127
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/json_cache.py +2 -2
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/json_codec.py +2 -2
- featrixsphere-0.2.2280/src/lib/featrix/neural/llm/__init__.py +6 -0
- featrixsphere-0.2.2280/src/lib/featrix/neural/llm/schema_analyzer.py +143 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/logging_config.py +23 -6
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/mask_tracker.py +5 -14
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/model_config.py +52 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/model_hash.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/movie_frame_task.py +3 -6
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/network_viz.py +1 -1
- featrixsphere-0.2.2280/src/lib/featrix/neural/platform_utils.py +84 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/prng_control.py +6 -6
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_adaptive_training.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_checkpoint_dict_reconstruction.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_confusion_matrix_metadata.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_embedding_quality.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_embedding_space.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_extend_embedding_space.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_focal_comparison.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_focal_comparison_enhanced.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_focal_loss_single_predictor.py +1 -1
- featrixsphere-0.2.2280/src/lib/featrix/neural/qa/test_hybrid_columns.py +481 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_label_smoothing.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_monitor_integration.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_timeline_quick.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_training_monitor.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/scalar_codec.py +5 -4
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/set_codec.py +30 -15
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/setlist_codec.py +10 -4
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/simple_mlp.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/single_predictor.py +1082 -393
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/sqlite_utils.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/stopwatch.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_analysis.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_cache.py +135 -32
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_codec.py +279 -35
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/timestamp_codec.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_context_manager.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_history_db.py +1 -1
- featrixsphere-0.2.2280/src/lib/featrix/neural/training_logger.py +974 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/transformer_encoder.py +50 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/url_codec.py +3 -3
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/url_parser.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/utils.py +29 -14
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/vector_codec.py +4 -4
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/world_data.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/job_manager.py +14 -9
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/json_encoder_cache.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/knn_training.py +7 -4
- featrixsphere-0.2.2280/src/lib/meta_learning_client.py +339 -0
- featrixsphere-0.2.2280/src/lib/quick_architecture_search.py +292 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/session_chains.py +175 -19
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/session_manager.py +415 -81
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/single_predictor_cv.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/single_predictor_training.py +451 -180
- featrixsphere-0.2.2280/src/lib/sp_training_wrapper.py +99 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/structureddata.py +2 -4
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/training_monitor.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/utils.py +1 -1
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/node-install.sh +214 -120
- featrixsphere-0.2.2280/src/query_schema_worker.py +255 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/render_sphere.py +1 -1
- featrixsphere-0.2.2280/src/repair_checkpoint.py +246 -0
- featrixsphere-0.2.2280/src/start_celery_gpu_worker.sh +142 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/version.py +24 -9
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/system_monitor.py +26 -8
- featrixsphere-0.2.2280/tests/test_client_data.py +145 -0
- featrixsphere-0.2.2280/tests/test_client_predictions.py +266 -0
- featrixsphere-0.2.2280/tests/test_client_sessions.py +268 -0
- featrixsphere-0.2.2280/tests/test_client_training.py +149 -0
- featrixsphere-0.2.2280/tests/test_local_integration.py +270 -0
- featrixsphere-0.2.1830/VERSION +0 -1
- featrixsphere-0.2.1830/src/lib/distribution_shift_detector.py +0 -481
- featrixsphere-0.2.1830/src/lib/es_training_wrapper.py +0 -60
- featrixsphere-0.2.1830/src/lib/featrix/neural/MetaDataCache.py +0 -203
- featrixsphere-0.2.1830/src/lib/featrix/neural/device.py +0 -40
- featrixsphere-0.2.1830/src/start_celery_gpu_worker.sh +0 -38
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/MANIFEST.in +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/README.md +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere/test_client.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/dependency_links.txt +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/entry_points.txt +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/not-zip-safe +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/requires.txt +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/top_level.txt +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/nv-install.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/setup.cfg +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/setup.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/auto_upgrade_monitor.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/build_version.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/demo_existing_model.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/demo_label_updates.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/deploy.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/deploy_cache_debug.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/ensure_watchdog_running.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/error_tracker.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/example_api_usage.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/example_prediction_feedback.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/example_train_predictor.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/gc_cleanup.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/api_event_retry.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/convergence_monitor.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/crash_tracker.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/dropout_scheduler.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/hubspot_free_domains_list_may_2025.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/demo_advisor_decisions.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/example_complete_workflow.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/generate_focal_report.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/model_advisor.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/show_results.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_piecewise_epochs.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_predict_during_training.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_warning_tracking.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/visualize_training_timeline.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/single_predictor_mlp.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/sphere_config.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_list_codec.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_event.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_exceptions.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix_debug.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/queue_manager.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/sphere_config.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/system_health_monitor.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/vector_db.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/webhook_helpers.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/weightwatcher_tracking.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/llm_client.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/manage_churro.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/migrate_string_cache_naming.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/neural.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/prediction_drift_monitor.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/prediction_persistence_worker.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/quick_test_deployment.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/recreate_session.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/redis_job_progress.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/redis_prediction_cli.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/redis_prediction_store.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/regenerate_training_movie.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/restart_celery_worker.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/run_api_server.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/send_email.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/slack.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/standalone_prediction.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/start_celery_cpu_worker.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/start_celery_worker.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/start_churro_server.sh +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/tail-watch.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_api_client.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_complete_workflow.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_json_tables_prediction.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_redis_predictions.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_server_connection.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_session_models.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_single_predictor_api.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_upload_endpoint.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/tree.py +0 -0
- {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.2.2278
|
|
@@ -208,9 +208,13 @@ def find_newest_version(index: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
|
208
208
|
print("⚠️ No files with version information found")
|
|
209
209
|
return None
|
|
210
210
|
|
|
211
|
-
# Sort by version (newest first)
|
|
211
|
+
# Sort by version (newest first), then by date_modified (newest first) as tiebreaker
|
|
212
|
+
# This ensures when multiple builds have the same version, we get the most recent one
|
|
212
213
|
versioned_files.sort(
|
|
213
|
-
key=lambda x:
|
|
214
|
+
key=lambda x: (
|
|
215
|
+
tuple(int(p) for p in x['version'].split('.')[:3]),
|
|
216
|
+
x.get('date_modified', '')
|
|
217
|
+
),
|
|
214
218
|
reverse=True
|
|
215
219
|
)
|
|
216
220
|
|
|
@@ -267,16 +271,62 @@ def install_package(package_file: Path, force: bool = False) -> bool:
|
|
|
267
271
|
print(f" Package version: {package_version}")
|
|
268
272
|
print(f" Package hash: {package_hash}")
|
|
269
273
|
|
|
270
|
-
# Check if already deployed
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
274
|
+
# Check if already deployed
|
|
275
|
+
deployed_hash = None
|
|
276
|
+
deployed_version = None
|
|
277
|
+
if Path("/sphere/app/VERSION_HASH").exists():
|
|
278
|
+
deployed_hash = Path("/sphere/app/VERSION_HASH").read_text().strip()
|
|
279
|
+
if Path("/sphere/app/VERSION").exists():
|
|
280
|
+
deployed_version = Path("/sphere/app/VERSION").read_text().strip()
|
|
281
|
+
|
|
282
|
+
same_hash = deployed_hash and package_hash != "unknown" and package_hash == deployed_hash
|
|
283
|
+
same_version = deployed_version and package_version != "unknown" and package_version == deployed_version
|
|
284
|
+
|
|
285
|
+
if same_hash and same_version:
|
|
286
|
+
if not force:
|
|
277
287
|
print(f"\n⏭️ This package is already deployed (hash: {package_hash})")
|
|
278
288
|
print(f" Skipping installation. Use --force to reinstall anyway.")
|
|
279
289
|
return True
|
|
290
|
+
else:
|
|
291
|
+
# FORCE REINSTALL OF SAME BUILD - MAKE IT SUPER OBVIOUS
|
|
292
|
+
import time
|
|
293
|
+
print()
|
|
294
|
+
print()
|
|
295
|
+
print("\033[1;33m" + "╔" + "=" * 78 + "╗" + "\033[0m")
|
|
296
|
+
print("\033[1;33m" + "║" + " " * 78 + "║" + "\033[0m")
|
|
297
|
+
print("\033[1;33m" + "║" + " ⚠️ ⚠️ ⚠️ REINSTALLING THE EXACT SAME BUILD ⚠️ ⚠️ ⚠️".ljust(78) + "║" + "\033[0m")
|
|
298
|
+
print("\033[1;33m" + "║" + " " * 78 + "║" + "\033[0m")
|
|
299
|
+
print("\033[1;33m" + "╚" + "=" * 78 + "╝" + "\033[0m")
|
|
300
|
+
print()
|
|
301
|
+
print("\033[1;33m" + "⚠️ WARNING: You are using --force to reinstall the SAME build!" + "\033[0m")
|
|
302
|
+
print()
|
|
303
|
+
print(f" Currently installed:")
|
|
304
|
+
print(f" Version: {deployed_version}")
|
|
305
|
+
print(f" Hash: {deployed_hash}")
|
|
306
|
+
print()
|
|
307
|
+
print(f" Package to install:")
|
|
308
|
+
print(f" Version: {package_version}")
|
|
309
|
+
print(f" Hash: {package_hash}")
|
|
310
|
+
print()
|
|
311
|
+
print("\033[1;33m" + " 👉 THIS IS THE EXACT SAME BUILD (version AND hash match)" + "\033[0m")
|
|
312
|
+
print()
|
|
313
|
+
print(" This will:")
|
|
314
|
+
print(" • Kill and restart all services")
|
|
315
|
+
print(" • Copy the exact same files over existing files")
|
|
316
|
+
print(" • Take 2-3 minutes to complete")
|
|
317
|
+
print()
|
|
318
|
+
print(" Common reasons to do this:")
|
|
319
|
+
print(" • Testing deployment process")
|
|
320
|
+
print(" • Services are broken and need clean restart")
|
|
321
|
+
print(" • Files were manually modified and need to be restored")
|
|
322
|
+
print()
|
|
323
|
+
for i in range(10, 0, -1):
|
|
324
|
+
print(f"\r ⏳ Starting reinstall in {i} seconds... (Ctrl+C to abort)", end='', flush=True)
|
|
325
|
+
time.sleep(1)
|
|
326
|
+
print()
|
|
327
|
+
print()
|
|
328
|
+
print("\033[1;32m" + "▶️ Proceeding with reinstall..." + "\033[0m")
|
|
329
|
+
print()
|
|
280
330
|
|
|
281
331
|
# Find node-install.sh
|
|
282
332
|
install_script = None
|
|
@@ -306,10 +356,14 @@ def install_package(package_file: Path, force: bool = False) -> bool:
|
|
|
306
356
|
# 1. We've already done version checking in featrix-update.py
|
|
307
357
|
# 2. The package is a specific version we want to install
|
|
308
358
|
# 3. node-install.sh checks git state from /home/mitch/sphere which may not match the package
|
|
359
|
+
|
|
360
|
+
# node-install.sh REQUIRES root (checked at line 326)
|
|
361
|
+
# It calls 'sbit fix-permissions' which needs sbit to have setuid bit
|
|
362
|
+
# The install script itself must run as root
|
|
309
363
|
cmd = ["sudo", str(install_script), "--force"]
|
|
310
364
|
print(f" Using --force flag (installing from package)")
|
|
311
365
|
|
|
312
|
-
# Run the install script
|
|
366
|
+
# Run the install script
|
|
313
367
|
result = subprocess.run(
|
|
314
368
|
cmd,
|
|
315
369
|
check=True,
|
|
@@ -440,12 +494,19 @@ def main():
|
|
|
440
494
|
print(f"\n✅ Update available: {current_version} → {newest_version}")
|
|
441
495
|
should_update = True
|
|
442
496
|
elif comparison == 0:
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
497
|
+
# Same version - check if hash is different (newer build of same version)
|
|
498
|
+
current_hash = get_current_version_hash()
|
|
499
|
+
if current_hash and newest_hash and current_hash != newest_hash:
|
|
500
|
+
print(f"\n✅ Newer build available: {current_version} ({current_hash} → {newest_hash[:8]})")
|
|
501
|
+
print(f" Same version number but different hash (newer build)")
|
|
446
502
|
should_update = True
|
|
447
503
|
else:
|
|
448
|
-
|
|
504
|
+
print(f"\n✅ Already on latest version: {current_version}")
|
|
505
|
+
if args.force:
|
|
506
|
+
print(" --force flag set, will reinstall anyway")
|
|
507
|
+
should_update = True
|
|
508
|
+
else:
|
|
509
|
+
should_update = False
|
|
449
510
|
else:
|
|
450
511
|
print(f"\n⚠️ Current version ({current_version}) is newer than available ({newest_version})")
|
|
451
512
|
if args.force:
|
|
@@ -565,7 +565,7 @@ class FeatrixSphereClient:
|
|
|
565
565
|
"""Make a DELETE request and return JSON response."""
|
|
566
566
|
response = self._make_request("DELETE", endpoint, max_retries=max_retries, **kwargs)
|
|
567
567
|
return self._unwrap_response(response.json())
|
|
568
|
-
|
|
568
|
+
|
|
569
569
|
# =========================================================================
|
|
570
570
|
# Session Management
|
|
571
571
|
# =========================================================================
|
|
@@ -893,6 +893,113 @@ class FeatrixSphereClient:
|
|
|
893
893
|
"""
|
|
894
894
|
response_data = self._post_json(f"/compute/session/{session_id}/unpublish", {})
|
|
895
895
|
return response_data
|
|
896
|
+
|
|
897
|
+
def publish_partial_foundation(
|
|
898
|
+
self,
|
|
899
|
+
source_session_id: str,
|
|
900
|
+
name: str,
|
|
901
|
+
checkpoint_epoch: int = None,
|
|
902
|
+
session_name_prefix: str = None,
|
|
903
|
+
publish: bool = True,
|
|
904
|
+
verbose: bool = True
|
|
905
|
+
) -> Dict[str, Any]:
|
|
906
|
+
"""
|
|
907
|
+
Publish a checkpoint from in-progress training as a standalone foundation model.
|
|
908
|
+
|
|
909
|
+
Takes a checkpoint from ongoing ES training and creates a NEW foundation model
|
|
910
|
+
session with full provenance tracking. Perfect for snapshotting good intermediate
|
|
911
|
+
models while training continues.
|
|
912
|
+
|
|
913
|
+
The new foundation model can be used with:
|
|
914
|
+
- train_on_foundational_model() - Train predictors on it
|
|
915
|
+
- Any standard foundation model operations
|
|
916
|
+
- Available across all compute nodes via backplane
|
|
917
|
+
|
|
918
|
+
Args:
|
|
919
|
+
source_session_id: Session with ES training (in-progress or completed)
|
|
920
|
+
name: Name for the new foundation model (REQUIRED)
|
|
921
|
+
checkpoint_epoch: Which epoch checkpoint to use (None = best/latest)
|
|
922
|
+
session_name_prefix: Optional prefix for new session ID
|
|
923
|
+
publish: Move to /sphere/published/ directory (default: True)
|
|
924
|
+
verbose: Print status updates
|
|
925
|
+
|
|
926
|
+
Returns:
|
|
927
|
+
dict with:
|
|
928
|
+
- foundation_session_id: New foundation session ID
|
|
929
|
+
- checkpoint_epoch: Epoch used
|
|
930
|
+
- provenance: Full metadata about source and training progress
|
|
931
|
+
- published_path: Path if published
|
|
932
|
+
|
|
933
|
+
Example:
|
|
934
|
+
```python
|
|
935
|
+
# Snapshot epoch 50 as foundation v0.5 while training continues
|
|
936
|
+
result = client.publish_partial_foundation(
|
|
937
|
+
source_session_id="abc-123",
|
|
938
|
+
name="My Foundation v0.5",
|
|
939
|
+
checkpoint_epoch=50,
|
|
940
|
+
session_name_prefix="foundation-v0.5",
|
|
941
|
+
publish=True
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
foundation_id = result['foundation_session_id']
|
|
945
|
+
print(f"Published foundation: {foundation_id}")
|
|
946
|
+
print(f"Source was {result['provenance']['training_progress_percent']}% trained")
|
|
947
|
+
|
|
948
|
+
# Use immediately like any foundation model
|
|
949
|
+
client.train_on_foundational_model(
|
|
950
|
+
foundation_model_id=foundation_id,
|
|
951
|
+
target_column="price",
|
|
952
|
+
target_column_type="scalar"
|
|
953
|
+
)
|
|
954
|
+
# Available on all compute nodes automatically via backplane
|
|
955
|
+
```
|
|
956
|
+
"""
|
|
957
|
+
if verbose:
|
|
958
|
+
print(f"📦 Publishing partial foundation from {source_session_id}")
|
|
959
|
+
print(f" Name: {name}")
|
|
960
|
+
if checkpoint_epoch is not None:
|
|
961
|
+
print(f" Checkpoint epoch: {checkpoint_epoch}")
|
|
962
|
+
else:
|
|
963
|
+
print(f" Checkpoint epoch: best/latest available")
|
|
964
|
+
print(f" Publish to /sphere/published/: {publish}")
|
|
965
|
+
|
|
966
|
+
data = {
|
|
967
|
+
'name': name,
|
|
968
|
+
'publish': publish
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
if checkpoint_epoch is not None:
|
|
972
|
+
data['checkpoint_epoch'] = checkpoint_epoch
|
|
973
|
+
if session_name_prefix:
|
|
974
|
+
data['session_name_prefix'] = session_name_prefix
|
|
975
|
+
|
|
976
|
+
try:
|
|
977
|
+
response_data = self._post_json(
|
|
978
|
+
f"/compute/session/{source_session_id}/publish_partial_foundation",
|
|
979
|
+
data
|
|
980
|
+
)
|
|
981
|
+
|
|
982
|
+
foundation_id = response_data.get('foundation_session_id')
|
|
983
|
+
checkpoint_used = response_data.get('checkpoint_epoch')
|
|
984
|
+
provenance = response_data.get('provenance', {})
|
|
985
|
+
|
|
986
|
+
if verbose:
|
|
987
|
+
print(f"✅ {response_data.get('message')}")
|
|
988
|
+
print(f" Foundation session ID: {foundation_id}")
|
|
989
|
+
print(f" Checkpoint epoch: {checkpoint_used}")
|
|
990
|
+
if provenance.get('training_progress_percent'):
|
|
991
|
+
print(f" Source training progress: {provenance['training_progress_percent']}%")
|
|
992
|
+
if provenance.get('validation_loss_at_checkpoint'):
|
|
993
|
+
print(f" Val loss at checkpoint: {provenance['validation_loss_at_checkpoint']:.4f}")
|
|
994
|
+
if response_data.get('published_path'):
|
|
995
|
+
print(f" Published to: {response_data['published_path']}")
|
|
996
|
+
|
|
997
|
+
return response_data
|
|
998
|
+
|
|
999
|
+
except Exception as e:
|
|
1000
|
+
if verbose:
|
|
1001
|
+
print(f"❌ Error publishing partial foundation: {e}")
|
|
1002
|
+
raise
|
|
896
1003
|
|
|
897
1004
|
def get_sessions_for_org(self, name_prefix: str, max_retries: int = None) -> Dict[str, Any]:
|
|
898
1005
|
"""
|
|
@@ -1974,9 +2081,30 @@ class FeatrixSphereClient:
|
|
|
1974
2081
|
- Category split: Use full data for ES, specific categories for predictor
|
|
1975
2082
|
- Label completeness: Include unlabeled rows in ES, exclude from predictor
|
|
1976
2083
|
- Test/holdout: Keep test data in ES context but exclude from predictor training
|
|
2084
|
+
|
|
2085
|
+
Special Input: Dictionary of Datasets
|
|
2086
|
+
--------------------------------------
|
|
2087
|
+
You can pass a dictionary of datasets instead of a single DataFrame. Each key is a dataset name,
|
|
2088
|
+
and each value is a list of DataFrames/tables to include in that dataset.
|
|
2089
|
+
|
|
2090
|
+
When using this format:
|
|
2091
|
+
- A __featrix_dataset_name column is automatically added to track which dataset each row came from
|
|
2092
|
+
- All tables from all datasets are concatenated into a single DataFrame before upload
|
|
2093
|
+
- The concatenated DataFrame is uploaded as normal
|
|
2094
|
+
|
|
2095
|
+
Example - Upload multiple datasets with labels:
|
|
2096
|
+
|
|
2097
|
+
datasets = {
|
|
2098
|
+
'training_data': [df1, df2, df3],
|
|
2099
|
+
'validation_data': [df4, df5],
|
|
2100
|
+
'test_data': [df6]
|
|
2101
|
+
}
|
|
2102
|
+
|
|
2103
|
+
session = client.upload_df_and_create_session(df=datasets)
|
|
2104
|
+
# Uploads a single DataFrame with __featrix_dataset_name column indicating source
|
|
1977
2105
|
|
|
1978
2106
|
Args:
|
|
1979
|
-
df: pandas DataFrame to upload (optional if file_path is provided)
|
|
2107
|
+
df: pandas DataFrame OR dict of {dataset_name: [DataFrames]} to upload (optional if file_path is provided)
|
|
1980
2108
|
filename: Name to give the uploaded file (default: "data.csv")
|
|
1981
2109
|
file_path: Path to CSV, Parquet, JSON, or JSONL file to upload (optional if df is provided)
|
|
1982
2110
|
column_overrides: Dict mapping column names to types ("scalar", "set", "free_string", "free_string_list")
|
|
@@ -2005,6 +2133,80 @@ class FeatrixSphereClient:
|
|
|
2005
2133
|
if column_types is not None:
|
|
2006
2134
|
column_overrides = column_types
|
|
2007
2135
|
|
|
2136
|
+
# Handle dictionary of datasets input
|
|
2137
|
+
if df is not None and isinstance(df, dict):
|
|
2138
|
+
print("Detected dictionary of datasets - concatenating with __featrix_dataset_name labels")
|
|
2139
|
+
all_dataframes = []
|
|
2140
|
+
total_rows = 0
|
|
2141
|
+
|
|
2142
|
+
for dataset_name, tables in df.items():
|
|
2143
|
+
if not isinstance(tables, list):
|
|
2144
|
+
raise ValueError(f"Value for dataset '{dataset_name}' must be a list of DataFrames/file paths, got {type(tables)}")
|
|
2145
|
+
|
|
2146
|
+
for i, table in enumerate(tables):
|
|
2147
|
+
# Handle file path (string)
|
|
2148
|
+
if isinstance(table, str):
|
|
2149
|
+
file_path_to_load = str(table)
|
|
2150
|
+
|
|
2151
|
+
if not os.path.exists(file_path_to_load):
|
|
2152
|
+
raise FileNotFoundError(f"File not found in dataset '{dataset_name}': {file_path_to_load}")
|
|
2153
|
+
|
|
2154
|
+
# Determine file type and load
|
|
2155
|
+
file_ext = file_path_to_load.lower()
|
|
2156
|
+
print(f" - {dataset_name} loading file: {os.path.basename(file_path_to_load)}")
|
|
2157
|
+
|
|
2158
|
+
if file_ext.endswith('.parquet'):
|
|
2159
|
+
loaded_df = pd.read_parquet(file_path_to_load)
|
|
2160
|
+
elif file_ext.endswith(('.json', '.jsonl')):
|
|
2161
|
+
try:
|
|
2162
|
+
from featrix.neural.input_data_file import featrix_wrap_read_json_file
|
|
2163
|
+
loaded_df = featrix_wrap_read_json_file(file_path_to_load)
|
|
2164
|
+
if loaded_df is None:
|
|
2165
|
+
raise ValueError(f"Failed to parse {'JSONL' if file_ext.endswith('.jsonl') else 'JSON'} file")
|
|
2166
|
+
except ImportError:
|
|
2167
|
+
# Fallback to pandas
|
|
2168
|
+
if file_ext.endswith('.jsonl'):
|
|
2169
|
+
import json
|
|
2170
|
+
records = []
|
|
2171
|
+
with open(file_path_to_load, 'r', encoding='utf-8') as f:
|
|
2172
|
+
for line in f:
|
|
2173
|
+
if line.strip():
|
|
2174
|
+
records.append(json.loads(line))
|
|
2175
|
+
loaded_df = pd.DataFrame(records)
|
|
2176
|
+
else:
|
|
2177
|
+
loaded_df = pd.read_json(file_path_to_load)
|
|
2178
|
+
elif file_ext.endswith(('.csv', '.csv.gz')):
|
|
2179
|
+
loaded_df = pd.read_csv(file_path_to_load)
|
|
2180
|
+
else:
|
|
2181
|
+
raise ValueError(f"Unsupported file type in dataset '{dataset_name}': {file_path_to_load}. "
|
|
2182
|
+
f"Supported: .csv, .csv.gz, .parquet, .json, .jsonl")
|
|
2183
|
+
|
|
2184
|
+
labeled_table = loaded_df
|
|
2185
|
+
print(f" Loaded {len(loaded_df)} rows, {len(loaded_df.columns)} columns")
|
|
2186
|
+
|
|
2187
|
+
# Handle DataFrame
|
|
2188
|
+
elif isinstance(table, pd.DataFrame):
|
|
2189
|
+
# Create a copy to avoid modifying the original
|
|
2190
|
+
labeled_table = table.copy()
|
|
2191
|
+
print(f" - {dataset_name} DataFrame {i+1}: {len(labeled_table)} rows, {len(labeled_table.columns)} columns")
|
|
2192
|
+
|
|
2193
|
+
else:
|
|
2194
|
+
raise ValueError(f"Table {i} in dataset '{dataset_name}' must be a pandas DataFrame or file path (str), got {type(table)}")
|
|
2195
|
+
|
|
2196
|
+
# Add the dataset name label column
|
|
2197
|
+
labeled_table['__featrix_dataset_name'] = dataset_name
|
|
2198
|
+
|
|
2199
|
+
all_dataframes.append(labeled_table)
|
|
2200
|
+
total_rows += len(labeled_table)
|
|
2201
|
+
|
|
2202
|
+
if not all_dataframes:
|
|
2203
|
+
raise ValueError("No DataFrames found in the provided dictionary")
|
|
2204
|
+
|
|
2205
|
+
# Concatenate all dataframes
|
|
2206
|
+
print(f"Concatenating {len(all_dataframes)} tables from {len(df)} datasets ({total_rows} total rows)")
|
|
2207
|
+
df = pd.concat(all_dataframes, ignore_index=True)
|
|
2208
|
+
print(f"Combined DataFrame: {len(df)} rows, {len(df.columns)} columns (includes __featrix_dataset_name)")
|
|
2209
|
+
|
|
2008
2210
|
# Validate inputs
|
|
2009
2211
|
if df is None and file_path is None:
|
|
2010
2212
|
raise ValueError("Either df or file_path must be provided")
|
|
@@ -4236,25 +4438,21 @@ class FeatrixSphereClient:
|
|
|
4236
4438
|
def clone_in_progress_embedding_space(self, session_id: str, from_compute: str, to_compute: str,
|
|
4237
4439
|
es_id: str = None, new_session_name: str = None) -> Dict[str, Any]:
|
|
4238
4440
|
"""
|
|
4239
|
-
Clone
|
|
4441
|
+
INTERNAL: Clone embedding space between compute nodes.
|
|
4240
4442
|
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4443
|
+
Note: With the backplane system, users generally don't need to manually clone.
|
|
4444
|
+
Sessions are automatically available across all compute nodes.
|
|
4445
|
+
This method is kept for backward compatibility and special cases.
|
|
4244
4446
|
|
|
4245
4447
|
Args:
|
|
4246
|
-
session_id: Source session ID
|
|
4247
|
-
from_compute: Source
|
|
4248
|
-
to_compute: Destination
|
|
4249
|
-
es_id: Optional ES ID
|
|
4250
|
-
new_session_name: Optional name for
|
|
4448
|
+
session_id: Source session ID
|
|
4449
|
+
from_compute: Source node name
|
|
4450
|
+
to_compute: Destination node name
|
|
4451
|
+
es_id: Optional ES ID (if session has multiple)
|
|
4452
|
+
new_session_name: Optional name for cloned session
|
|
4251
4453
|
|
|
4252
4454
|
Returns:
|
|
4253
|
-
Dict with new_session_id
|
|
4254
|
-
|
|
4255
|
-
Raises:
|
|
4256
|
-
ValueError: If multiple ES found in session and es_id not provided
|
|
4257
|
-
HTTPException: If cloning fails
|
|
4455
|
+
Dict with new_session_id
|
|
4258
4456
|
"""
|
|
4259
4457
|
# Prepare request data
|
|
4260
4458
|
request_data = {
|
|
@@ -4322,40 +4520,6 @@ class FeatrixSphereClient:
|
|
|
4322
4520
|
print(f"Training predictor on foundation model {foundation_model_id}...")
|
|
4323
4521
|
print(f" Target: {target_column} ({target_column_type})")
|
|
4324
4522
|
|
|
4325
|
-
# Get the compute cluster from the foundation model session
|
|
4326
|
-
# This ensures we upload files to the same node where the foundation model lives
|
|
4327
|
-
# If the foundation session doesn't exist (404), we'll proceed with current compute cluster
|
|
4328
|
-
foundation_compute_cluster = None
|
|
4329
|
-
try:
|
|
4330
|
-
foundation_session = self.get_session_status(foundation_model_id)
|
|
4331
|
-
foundation_compute_cluster = self.get_last_server_metadata()
|
|
4332
|
-
foundation_compute_cluster = foundation_compute_cluster.get('compute_cluster') if foundation_compute_cluster else None
|
|
4333
|
-
except Exception as e:
|
|
4334
|
-
# Foundation session might not exist or be accessible - that's okay
|
|
4335
|
-
# The server will validate it when we submit the training request
|
|
4336
|
-
if verbose:
|
|
4337
|
-
# Check if it's a 404 HTTP error
|
|
4338
|
-
is_404 = False
|
|
4339
|
-
if isinstance(e, requests.exceptions.HTTPError):
|
|
4340
|
-
if hasattr(e, 'response') and e.response.status_code == 404:
|
|
4341
|
-
is_404 = True
|
|
4342
|
-
|
|
4343
|
-
if is_404:
|
|
4344
|
-
print(f" ⚠️ Foundation session not found (404) - will use current compute cluster")
|
|
4345
|
-
print(f" Server will validate foundation model when training starts")
|
|
4346
|
-
else:
|
|
4347
|
-
print(f" ⚠️ Could not fetch foundation session: {e}")
|
|
4348
|
-
print(f" Will proceed with current compute cluster")
|
|
4349
|
-
|
|
4350
|
-
# Temporarily set compute cluster for file uploads if we found one
|
|
4351
|
-
original_compute_cluster = self.compute_cluster
|
|
4352
|
-
original_headers = self.session.headers.copy()
|
|
4353
|
-
if foundation_compute_cluster:
|
|
4354
|
-
self.set_compute_cluster(foundation_compute_cluster)
|
|
4355
|
-
if verbose:
|
|
4356
|
-
print(f" Using compute cluster: {foundation_compute_cluster}")
|
|
4357
|
-
elif verbose and self.compute_cluster:
|
|
4358
|
-
print(f" Using current compute cluster: {self.compute_cluster}")
|
|
4359
4523
|
|
|
4360
4524
|
try:
|
|
4361
4525
|
# Validate that only one data source is provided
|
|
@@ -4453,20 +4617,9 @@ class FeatrixSphereClient:
|
|
|
4453
4617
|
|
|
4454
4618
|
new_session_id = response_data.get('session_id')
|
|
4455
4619
|
print(f"✅ Predictor training session created: {new_session_id}")
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4459
|
-
if original_compute_cluster:
|
|
4460
|
-
self.set_compute_cluster(original_compute_cluster)
|
|
4461
|
-
else:
|
|
4462
|
-
self.session.headers = original_headers
|
|
4463
|
-
finally:
|
|
4464
|
-
# Ensure we restore headers even if there's an error
|
|
4465
|
-
if original_compute_cluster != self.compute_cluster:
|
|
4466
|
-
if original_compute_cluster:
|
|
4467
|
-
self.set_compute_cluster(original_compute_cluster)
|
|
4468
|
-
else:
|
|
4469
|
-
self.session.headers = original_headers
|
|
4620
|
+
|
|
4621
|
+
except Exception as e:
|
|
4622
|
+
raise
|
|
4470
4623
|
|
|
4471
4624
|
if verbose:
|
|
4472
4625
|
print(f"⏳ Waiting for training to complete...")
|
|
@@ -4854,14 +5007,44 @@ class FeatrixSphereClient:
|
|
|
4854
5007
|
The system handles the hard decisions so you can focus on your problem, not
|
|
4855
5008
|
hyperparameter tuning.
|
|
4856
5009
|
|
|
5010
|
+
MULTI-DATASET INPUT (NEW):
|
|
5011
|
+
---------------------------
|
|
5012
|
+
You can now pass a dictionary of datasets for the `df` parameter, just like in
|
|
5013
|
+
upload_df_and_create_session(). This is useful when combining multiple sources
|
|
5014
|
+
for predictor training:
|
|
5015
|
+
|
|
5016
|
+
```python
|
|
5017
|
+
# Train predictor on multiple datasets with labels
|
|
5018
|
+
training_data = {
|
|
5019
|
+
'extra_rows_from_matt': ['matt_supplement.csv', 'matt_additions.parquet'],
|
|
5020
|
+
'main_training': [df1, df2, 'training.csv'],
|
|
5021
|
+
'validation_samples': ['validation.csv']
|
|
5022
|
+
}
|
|
5023
|
+
|
|
5024
|
+
result = client.train_single_predictor(
|
|
5025
|
+
session_id=session.session_id,
|
|
5026
|
+
df=training_data, # Dictionary of datasets
|
|
5027
|
+
target_column='outcome',
|
|
5028
|
+
target_column_type='set'
|
|
5029
|
+
)
|
|
5030
|
+
```
|
|
5031
|
+
|
|
5032
|
+
When using dictionary format:
|
|
5033
|
+
- Each key is a dataset name (e.g., 'extra_rows_from_matt')
|
|
5034
|
+
- Each value is a list of DataFrames and/or file paths
|
|
5035
|
+
- A __featrix_dataset_name column is automatically added
|
|
5036
|
+
- All tables are concatenated before training
|
|
5037
|
+
- Works with all file types: CSV, Parquet, JSON, JSONL
|
|
5038
|
+
|
|
4857
5039
|
Args:
|
|
4858
5040
|
session_id: ID of session with trained embedding space
|
|
4859
5041
|
target_column: Name of the target column to predict
|
|
4860
5042
|
target_column_type: Type of target column ("set" or "scalar")
|
|
4861
5043
|
file_path: Path to DIFFERENT training file (CSV or .csv.gz) to use for predictor training.
|
|
4862
|
-
df: pandas DataFrame
|
|
4863
|
-
Use file_path OR df (not both) to train
|
|
4864
|
-
embedding space! If neither provided, uses
|
|
5044
|
+
df: pandas DataFrame OR dict of {dataset_name: [DataFrames/file paths]} with DIFFERENT
|
|
5045
|
+
training data to use for predictor training. Use file_path OR df (not both) to train
|
|
5046
|
+
predictor on different data than your embedding space! If neither provided, uses
|
|
5047
|
+
session's original data file.
|
|
4865
5048
|
epochs: Number of training epochs (default: 0; automatic)
|
|
4866
5049
|
validation_ignore_columns: List of column names to exclude from validation queries (default: None)
|
|
4867
5050
|
rare_label_value: For binary classification, which class is the rare/minority class for metrics (default: None)
|
|
@@ -4882,6 +5065,86 @@ class FeatrixSphereClient:
|
|
|
4882
5065
|
if file_path and df is not None:
|
|
4883
5066
|
raise ValueError("Provide either file_path or df, not both")
|
|
4884
5067
|
|
|
5068
|
+
# Handle dictionary of datasets input (same as upload_df_and_create_session)
|
|
5069
|
+
if df is not None and isinstance(df, dict):
|
|
5070
|
+
if verbose:
|
|
5071
|
+
print("Detected dictionary of datasets - concatenating with __featrix_dataset_name labels")
|
|
5072
|
+
all_dataframes = []
|
|
5073
|
+
total_rows = 0
|
|
5074
|
+
|
|
5075
|
+
for dataset_name, tables in df.items():
|
|
5076
|
+
if not isinstance(tables, list):
|
|
5077
|
+
raise ValueError(f"Value for dataset '{dataset_name}' must be a list of DataFrames/file paths, got {type(tables)}")
|
|
5078
|
+
|
|
5079
|
+
for i, table in enumerate(tables):
|
|
5080
|
+
# Handle file path (string)
|
|
5081
|
+
if isinstance(table, str):
|
|
5082
|
+
file_path_to_load = str(table)
|
|
5083
|
+
|
|
5084
|
+
if not os.path.exists(file_path_to_load):
|
|
5085
|
+
raise FileNotFoundError(f"File not found in dataset '{dataset_name}': {file_path_to_load}")
|
|
5086
|
+
|
|
5087
|
+
# Determine file type and load
|
|
5088
|
+
file_ext = file_path_to_load.lower()
|
|
5089
|
+
if verbose:
|
|
5090
|
+
print(f" - {dataset_name} loading file: {os.path.basename(file_path_to_load)}")
|
|
5091
|
+
|
|
5092
|
+
if file_ext.endswith('.parquet'):
|
|
5093
|
+
loaded_df = pd.read_parquet(file_path_to_load)
|
|
5094
|
+
elif file_ext.endswith(('.json', '.jsonl')):
|
|
5095
|
+
try:
|
|
5096
|
+
from featrix.neural.input_data_file import featrix_wrap_read_json_file
|
|
5097
|
+
loaded_df = featrix_wrap_read_json_file(file_path_to_load)
|
|
5098
|
+
if loaded_df is None:
|
|
5099
|
+
raise ValueError(f"Failed to parse {'JSONL' if file_ext.endswith('.jsonl') else 'JSON'} file")
|
|
5100
|
+
except ImportError:
|
|
5101
|
+
# Fallback to pandas
|
|
5102
|
+
if file_ext.endswith('.jsonl'):
|
|
5103
|
+
import json
|
|
5104
|
+
records = []
|
|
5105
|
+
with open(file_path_to_load, 'r', encoding='utf-8') as f:
|
|
5106
|
+
for line in f:
|
|
5107
|
+
if line.strip():
|
|
5108
|
+
records.append(json.loads(line))
|
|
5109
|
+
loaded_df = pd.DataFrame(records)
|
|
5110
|
+
else:
|
|
5111
|
+
loaded_df = pd.read_json(file_path_to_load)
|
|
5112
|
+
elif file_ext.endswith(('.csv', '.csv.gz')):
|
|
5113
|
+
loaded_df = pd.read_csv(file_path_to_load)
|
|
5114
|
+
else:
|
|
5115
|
+
raise ValueError(f"Unsupported file type in dataset '{dataset_name}': {file_path_to_load}. "
|
|
5116
|
+
f"Supported: .csv, .csv.gz, .parquet, .json, .jsonl")
|
|
5117
|
+
|
|
5118
|
+
labeled_table = loaded_df
|
|
5119
|
+
if verbose:
|
|
5120
|
+
print(f" Loaded {len(loaded_df)} rows, {len(loaded_df.columns)} columns")
|
|
5121
|
+
|
|
5122
|
+
# Handle DataFrame
|
|
5123
|
+
elif isinstance(table, pd.DataFrame):
|
|
5124
|
+
# Create a copy to avoid modifying the original
|
|
5125
|
+
labeled_table = table.copy()
|
|
5126
|
+
if verbose:
|
|
5127
|
+
print(f" - {dataset_name} DataFrame {i+1}: {len(labeled_table)} rows, {len(labeled_table.columns)} columns")
|
|
5128
|
+
|
|
5129
|
+
else:
|
|
5130
|
+
raise ValueError(f"Table {i} in dataset '{dataset_name}' must be a pandas DataFrame or file path (str), got {type(table)}")
|
|
5131
|
+
|
|
5132
|
+
# Add the dataset name label column
|
|
5133
|
+
labeled_table['__featrix_dataset_name'] = dataset_name
|
|
5134
|
+
|
|
5135
|
+
all_dataframes.append(labeled_table)
|
|
5136
|
+
total_rows += len(labeled_table)
|
|
5137
|
+
|
|
5138
|
+
if not all_dataframes:
|
|
5139
|
+
raise ValueError("No DataFrames found in the provided dictionary")
|
|
5140
|
+
|
|
5141
|
+
# Concatenate all dataframes
|
|
5142
|
+
if verbose:
|
|
5143
|
+
print(f"Concatenating {len(all_dataframes)} tables from {len(df)} datasets ({total_rows} total rows)")
|
|
5144
|
+
df = pd.concat(all_dataframes, ignore_index=True)
|
|
5145
|
+
if verbose:
|
|
5146
|
+
print(f"Combined DataFrame: {len(df)} rows, {len(df.columns)} columns (includes __featrix_dataset_name)")
|
|
5147
|
+
|
|
4885
5148
|
# Validate cost parameters
|
|
4886
5149
|
if cost_false_positive is not None or cost_false_negative is not None:
|
|
4887
5150
|
if cost_false_positive is None or cost_false_negative is None:
|
|
@@ -5079,7 +5342,7 @@ class FeatrixSphereClient:
|
|
|
5079
5342
|
Extend embedding space training with new data.
|
|
5080
5343
|
|
|
5081
5344
|
This function:
|
|
5082
|
-
1.
|
|
5345
|
+
1. Creates a new session with the existing embedding space
|
|
5083
5346
|
2. Uploads/processes the new data
|
|
5084
5347
|
3. Continues training from where the previous training left off
|
|
5085
5348
|
4. Trains for the specified number of additional epochs (data_passes)
|