featrixsphere 0.2.3613__tar.gz → 0.2.3737__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/PKG-INFO +1 -1
- featrixsphere-0.2.3737/VERSION +1 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere/__init__.py +1 -1
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere/client.py +5 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere.egg-info/PKG-INFO +1 -1
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere.egg-info/SOURCES.txt +3 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/api.py +37 -4
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/celery_app.py +331 -53
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/celery_job_recovery.py +62 -4
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/es_training.py +66 -7
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/dataloader_utils.py +3 -2
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/detect.py +63 -11
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/embedded_space.py +239 -58
- featrixsphere-0.2.3737/src/lib/featrix/neural/embedding_quality.py +417 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/encoders.py +21 -4
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/featrix_module_dict.py +7 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/guardrails.py +19 -4
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/input_data_set.py +1 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/io_utils.py +261 -40
- featrixsphere-0.2.3737/src/lib/featrix/neural/local_string_cache.py +482 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/model_config.py +3 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/relationship_estimator.py +31 -3
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/simple_mlp.py +120 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/simple_string_cache.py +66 -19
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/single_predictor.py +690 -26
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/sphere_config.py +38 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/training_context_manager.py +11 -2
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/transformer_encoder.py +35 -13
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/job_manager.py +6 -2
- featrixsphere-0.2.3737/src/lib/pre_analysis_wrapper.py +115 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/quick_architecture_search.py +115 -5
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/session_manager.py +71 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/single_predictor_training.py +235 -13
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/utils.py +6 -2
- featrixsphere-0.2.3613/VERSION +0 -1
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/MANIFEST.in +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/README.md +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrix-update.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere/test_client.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere.egg-info/dependency_links.txt +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere.egg-info/entry_points.txt +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere.egg-info/not-zip-safe +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere.egg-info/requires.txt +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/featrixsphere.egg-info/top_level.txt +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/nv-install.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/requirements.txt +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/setup.cfg +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/setup.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/auto_upgrade_monitor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/build_version.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/config.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/demo_existing_model.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/demo_label_updates.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/deploy.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/deploy_cache_debug.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/ensure_watchdog_running.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/error_tracker.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/event_log.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/example_api_usage.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/example_prediction_feedback.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/example_train_predictor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/featrix_watchdog.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/gc_cleanup.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/api_event_retry.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/convergence_monitor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/crash_tracker.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/distribution_shift_detector.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/epoch_projections.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/es_projections.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/es_training_wrapper.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/__init__.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/__init__.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/calibration_utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/classification_metrics.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/config.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/data_frame_data_set.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/domain_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/dropout_scheduler.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/embedding_lr_scheduler.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/embedding_space_utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/embedding_utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/enrich.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/es_projection.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/exceptions.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/featrix_csv.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/featrix_json.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/featrix_token.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/feature_engineer.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/feature_suggestion_tracker.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/gpu_utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/graph_encoder.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/graph_encoder_training.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/hubspot_free_domains_list_may_2025.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/hybrid_column_detector.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/hybrid_encoders.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/input_data_file.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/integrity.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/json_cache.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/json_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/llm/__init__.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/llm/schema_analyzer.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/logging_config.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/mask_tracker.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/model_hash.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/movie_frame_task.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/multi_table_dataset.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/multi_table_embedding_space.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/network_viz.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/platform_utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/prng_control.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/demo_advisor_decisions.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/example_complete_workflow.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/generate_focal_report.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/model_advisor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/show_results.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_adaptive_training.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_checkpoint_dict_reconstruction.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_confusion_matrix_metadata.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_embedding_quality.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_embedding_space.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_extend_embedding_space.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_focal_comparison.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_focal_comparison_enhanced.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_focal_loss_single_predictor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_hybrid_columns.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_label_smoothing.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_monitor_integration.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_piecewise_epochs.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_predict_during_training.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_timeline_quick.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_training_monitor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/test_warning_tracking.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/qa/visualize_training_timeline.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/relationship_complexity.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/relationship_extractor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/relationship_performance.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/relationship_preanalysis.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/relationship_search.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/scalar_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/schema_history.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/set_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/setlist_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/single_predictor_mlp.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/sqlite_utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/stopwatch.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/string_analysis.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/string_cache.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/string_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/string_list_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/test_graph_encoder.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/timestamp_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/training_banner.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/training_event.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/training_exceptions.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/training_history_db.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/training_logger.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/url_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/url_parser.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/vector_codec.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix/neural/world_data.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/featrix_debug.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/json_encoder_cache.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/knn_training.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/meta_learning_client.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/queue_manager.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/session_chains.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/single_predictor_cv.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/sp_training_wrapper.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/sphere_config.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/structureddata.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/system_health_monitor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/training_monitor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/utils.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/vector_db.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/webhook_helpers.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/lib/weightwatcher_tracking.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/llm_client.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/manage_churro.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/migrate_string_cache_naming.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/neural.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/node-install.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/prediction_drift_monitor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/prediction_persistence_worker.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/query_schema_worker.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/quick_test_deployment.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/recreate_session.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/redis_job_progress.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/redis_prediction_cli.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/redis_prediction_store.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/regenerate_training_movie.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/render_sphere.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/repair_checkpoint.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/run_api_server.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/send_email.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/slack.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/standalone_prediction.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/start_celery_cpu_worker.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/start_celery_gpu_worker.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/start_celery_movie_worker.sh +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/tail-watch.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_api_client.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_complete_workflow.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_json_tables_prediction.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_redis_predictions.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_server_connection.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_session_models.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_single_predictor_api.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/test_upload_endpoint.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/tree.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/src/version.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/system_monitor.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/tests/test_client_data.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/tests/test_client_predictions.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/tests/test_client_sessions.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/tests/test_client_training.py +0 -0
- {featrixsphere-0.2.3613 → featrixsphere-0.2.3737}/tests/test_local_integration.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.2.3737
|
|
@@ -566,6 +566,11 @@ class FeatrixSphereClient:
|
|
|
566
566
|
response = self._make_request("DELETE", endpoint, max_retries=max_retries, **kwargs)
|
|
567
567
|
return self._unwrap_response(response.json())
|
|
568
568
|
|
|
569
|
+
def _post_multipart(self, endpoint: str, data: Dict[str, Any] = None, files: Dict[str, Any] = None, max_retries: int = None, **kwargs) -> Dict[str, Any]:
|
|
570
|
+
"""Make a POST request with multipart/form-data (for file uploads) and return JSON response."""
|
|
571
|
+
response = self._make_request("POST", endpoint, data=data, files=files, max_retries=max_retries, **kwargs)
|
|
572
|
+
return self._unwrap_response(response.json())
|
|
573
|
+
|
|
569
574
|
# =========================================================================
|
|
570
575
|
# Session Management
|
|
571
576
|
# =========================================================================
|
|
@@ -82,6 +82,7 @@ src/lib/job_manager.py
|
|
|
82
82
|
src/lib/json_encoder_cache.py
|
|
83
83
|
src/lib/knn_training.py
|
|
84
84
|
src/lib/meta_learning_client.py
|
|
85
|
+
src/lib/pre_analysis_wrapper.py
|
|
85
86
|
src/lib/queue_manager.py
|
|
86
87
|
src/lib/quick_architecture_search.py
|
|
87
88
|
src/lib/session_chains.py
|
|
@@ -109,6 +110,7 @@ src/lib/featrix/neural/domain_codec.py
|
|
|
109
110
|
src/lib/featrix/neural/dropout_scheduler.py
|
|
110
111
|
src/lib/featrix/neural/embedded_space.py
|
|
111
112
|
src/lib/featrix/neural/embedding_lr_scheduler.py
|
|
113
|
+
src/lib/featrix/neural/embedding_quality.py
|
|
112
114
|
src/lib/featrix/neural/embedding_space_utils.py
|
|
113
115
|
src/lib/featrix/neural/embedding_utils.py
|
|
114
116
|
src/lib/featrix/neural/encoders.py
|
|
@@ -134,6 +136,7 @@ src/lib/featrix/neural/integrity.py
|
|
|
134
136
|
src/lib/featrix/neural/io_utils.py
|
|
135
137
|
src/lib/featrix/neural/json_cache.py
|
|
136
138
|
src/lib/featrix/neural/json_codec.py
|
|
139
|
+
src/lib/featrix/neural/local_string_cache.py
|
|
137
140
|
src/lib/featrix/neural/logging_config.py
|
|
138
141
|
src/lib/featrix/neural/mask_tracker.py
|
|
139
142
|
src/lib/featrix/neural/model_config.py
|
|
@@ -614,24 +614,57 @@ def create_app() -> FastAPI:
|
|
|
614
614
|
Comprehensive health check endpoint with system metrics.
|
|
615
615
|
Reads cached health data from Redis (populated by system_monitor.py).
|
|
616
616
|
Returns error if cache is stale (>90 seconds old).
|
|
617
|
+
|
|
618
|
+
CRITICAL: Uses timeout on Redis operations to prevent hanging if Redis is slow/unresponsive.
|
|
617
619
|
"""
|
|
618
620
|
from lib.job_manager import get_redis_client
|
|
619
621
|
|
|
620
622
|
current_time = time.time()
|
|
621
623
|
uptime_seconds = current_time - SERVER_STARTUP_TIME
|
|
622
624
|
|
|
623
|
-
# Read cached health data from Redis
|
|
624
|
-
|
|
625
|
+
# Read cached health data from Redis with timeout handling
|
|
626
|
+
# CRITICAL: Run blocking Redis operation in thread pool to avoid blocking event loop
|
|
625
627
|
redis_key = "health:cache"
|
|
626
|
-
cached_data =
|
|
628
|
+
cached_data = None
|
|
629
|
+
redis_error = None
|
|
630
|
+
|
|
631
|
+
def _get_redis_cache():
|
|
632
|
+
"""Blocking Redis operation - runs in thread pool."""
|
|
633
|
+
import redis
|
|
634
|
+
try:
|
|
635
|
+
redis_client = get_redis_client()
|
|
636
|
+
# Redis client now has socket_timeout=2.0, so this will timeout quickly if Redis is hung
|
|
637
|
+
return redis_client.get(redis_key), None
|
|
638
|
+
except redis.TimeoutError as e:
|
|
639
|
+
return None, f"Redis timeout: {e}"
|
|
640
|
+
except redis.ConnectionError as e:
|
|
641
|
+
return None, f"Redis connection error: {e}"
|
|
642
|
+
except Exception as e:
|
|
643
|
+
return None, f"Redis error: {e}"
|
|
644
|
+
|
|
645
|
+
try:
|
|
646
|
+
import asyncio
|
|
647
|
+
# Run blocking Redis operation in thread pool - prevents blocking event loop
|
|
648
|
+
cached_data, redis_error = await asyncio.to_thread(_get_redis_cache)
|
|
649
|
+
if redis_error:
|
|
650
|
+
logger.warning(f"⚠️ Health endpoint: {redis_error}")
|
|
651
|
+
except Exception as e:
|
|
652
|
+
redis_error = f"Unexpected error: {e}"
|
|
653
|
+
logger.warning(f"⚠️ Health endpoint: {redis_error}")
|
|
627
654
|
|
|
628
655
|
if not cached_data:
|
|
656
|
+
# If Redis error occurred, include it in the response
|
|
657
|
+
error_msg = "Health data not available - system monitor may not be running"
|
|
658
|
+
if redis_error:
|
|
659
|
+
error_msg += f" (Redis error: {redis_error})"
|
|
660
|
+
|
|
629
661
|
return JSONResponse(
|
|
630
662
|
status_code=503,
|
|
631
663
|
content={
|
|
632
664
|
"status": "unhealthy",
|
|
633
665
|
"hostname": socket.gethostname(),
|
|
634
|
-
"error":
|
|
666
|
+
"error": error_msg,
|
|
667
|
+
"redis_error": redis_error,
|
|
635
668
|
"timestamp": datetime.datetime.now().isoformat(),
|
|
636
669
|
}
|
|
637
670
|
)
|
|
@@ -144,6 +144,8 @@ def check_duplicate_task_delivery(sender=None, task_id=None, task=None, args=Non
|
|
|
144
144
|
If a duplicate is detected, we raise Ignore() to gracefully skip the task without
|
|
145
145
|
marking it as failed or acquiring locks that would conflict with the running instance.
|
|
146
146
|
|
|
147
|
+
Also logs the full job configuration for all tasks to help debug configuration issues.
|
|
148
|
+
|
|
147
149
|
Args:
|
|
148
150
|
sender: The task class
|
|
149
151
|
task_id: Celery task ID
|
|
@@ -151,7 +153,59 @@ def check_duplicate_task_delivery(sender=None, task_id=None, task=None, args=Non
|
|
|
151
153
|
args: Task positional arguments
|
|
152
154
|
kwargs: Task keyword arguments
|
|
153
155
|
"""
|
|
154
|
-
#
|
|
156
|
+
# LOG FULL JOB CONFIGURATION FOR ALL TASKS
|
|
157
|
+
try:
|
|
158
|
+
import json
|
|
159
|
+
task_name = sender.name if sender else 'unknown'
|
|
160
|
+
logger.info(f"\n{'='*80}")
|
|
161
|
+
logger.info(f"🔵 [TASK START] {task_name}")
|
|
162
|
+
logger.info(f" Task ID: {task_id}")
|
|
163
|
+
logger.info(f" Timestamp: {datetime.now().isoformat()}")
|
|
164
|
+
|
|
165
|
+
# Extract and log job_spec for all tasks
|
|
166
|
+
if args and len(args) > 0:
|
|
167
|
+
job_spec = args[0] if isinstance(args[0], dict) else None
|
|
168
|
+
if job_spec:
|
|
169
|
+
logger.info(f" 📋 Job Configuration (job_spec):")
|
|
170
|
+
# Pretty print the job_spec with indentation
|
|
171
|
+
try:
|
|
172
|
+
job_spec_str = json.dumps(job_spec, indent=4, default=str)
|
|
173
|
+
# Log each line with indentation
|
|
174
|
+
for line in job_spec_str.split('\n'):
|
|
175
|
+
logger.info(f" {line}")
|
|
176
|
+
except Exception as json_err:
|
|
177
|
+
logger.info(f" (Could not serialize job_spec: {json_err})")
|
|
178
|
+
logger.info(f" job_spec type: {type(job_spec)}")
|
|
179
|
+
logger.info(f" job_spec keys: {list(job_spec.keys()) if isinstance(job_spec, dict) else 'N/A'}")
|
|
180
|
+
|
|
181
|
+
# Log all args
|
|
182
|
+
logger.info(f" 📦 Task Arguments:")
|
|
183
|
+
for i, arg in enumerate(args):
|
|
184
|
+
if isinstance(arg, dict):
|
|
185
|
+
# Skip logging full dicts again (already logged as job_spec)
|
|
186
|
+
if i == 0 and job_spec:
|
|
187
|
+
logger.info(f" args[{i}]: <job_spec> (logged above)")
|
|
188
|
+
else:
|
|
189
|
+
logger.info(f" args[{i}]: {type(arg).__name__} with {len(arg)} keys" if isinstance(arg, dict) else f" args[{i}]: {arg}")
|
|
190
|
+
else:
|
|
191
|
+
logger.info(f" args[{i}]: {arg}")
|
|
192
|
+
|
|
193
|
+
if kwargs:
|
|
194
|
+
logger.info(f" 📦 Task Keyword Arguments:")
|
|
195
|
+
for key, value in kwargs.items():
|
|
196
|
+
if isinstance(value, dict):
|
|
197
|
+
logger.info(f" {key}: <dict with {len(value)} keys>")
|
|
198
|
+
elif isinstance(value, (list, tuple)):
|
|
199
|
+
logger.info(f" {key}: <{type(value).__name__} with {len(value)} items>")
|
|
200
|
+
else:
|
|
201
|
+
logger.info(f" {key}: {value}")
|
|
202
|
+
|
|
203
|
+
logger.info(f"{'='*80}\n")
|
|
204
|
+
except Exception as log_err:
|
|
205
|
+
# Don't let logging errors break task execution
|
|
206
|
+
logger.warning(f"⚠️ Error logging job configuration: {log_err}")
|
|
207
|
+
|
|
208
|
+
# Only check long-running GPU training tasks for duplicates
|
|
155
209
|
if sender.name not in ['celery_app.train_es', 'celery_app.train_single_predictor']:
|
|
156
210
|
return
|
|
157
211
|
|
|
@@ -238,7 +292,6 @@ def setup_job_logging(job_id: str, session_id: str, job_type: str):
|
|
|
238
292
|
import stat
|
|
239
293
|
import socket
|
|
240
294
|
import fcntl
|
|
241
|
-
import time as time_module
|
|
242
295
|
|
|
243
296
|
job_dir = get_job_output_path(job_id, session_id, job_type)
|
|
244
297
|
original_cwd = os.getcwd()
|
|
@@ -247,6 +300,44 @@ def setup_job_logging(job_id: str, session_id: str, job_type: str):
|
|
|
247
300
|
job_dir.mkdir(parents=True, exist_ok=True)
|
|
248
301
|
logger.info(f"📁 Job directory: {job_dir} (exists: {job_dir.exists()})")
|
|
249
302
|
|
|
303
|
+
# Create JOB_INFO.json with job metadata (queued_at, created_at, etc.)
|
|
304
|
+
try:
|
|
305
|
+
from lib.job_manager import load_job
|
|
306
|
+
job_data = load_job(job_id)
|
|
307
|
+
|
|
308
|
+
job_info = {
|
|
309
|
+
"job_id": job_id,
|
|
310
|
+
"session_id": session_id,
|
|
311
|
+
"job_type": job_type,
|
|
312
|
+
"created_at": job_data.get("created_at") if job_data else None,
|
|
313
|
+
"queued_at": job_data.get("created_at") if job_data else None, # Same as created_at
|
|
314
|
+
"started_at": datetime.now().isoformat(),
|
|
315
|
+
"status": job_data.get("status") if job_data else "unknown",
|
|
316
|
+
"worker_hostname": socket.gethostname(),
|
|
317
|
+
"celery_task_id": job_id,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
# Add any additional job_spec info if available
|
|
321
|
+
if job_data and "job_spec" in job_data:
|
|
322
|
+
job_spec = job_data["job_spec"]
|
|
323
|
+
if isinstance(job_spec, dict):
|
|
324
|
+
# Include relevant job_spec fields
|
|
325
|
+
if "target_column" in job_spec:
|
|
326
|
+
job_info["target_column"] = job_spec["target_column"]
|
|
327
|
+
if "target_column_type" in job_spec:
|
|
328
|
+
job_info["target_column_type"] = job_spec["target_column_type"]
|
|
329
|
+
|
|
330
|
+
job_info_path = job_dir / "JOB_INFO.json"
|
|
331
|
+
with open(job_info_path, 'w') as f:
|
|
332
|
+
json.dump(job_info, f, indent=2, default=str)
|
|
333
|
+
|
|
334
|
+
logger.info(f"📝 Created JOB_INFO.json: {job_info_path}")
|
|
335
|
+
if job_info.get("queued_at"):
|
|
336
|
+
logger.info(f" Queued at: {job_info['queued_at']}")
|
|
337
|
+
except Exception as job_info_err:
|
|
338
|
+
logger.warning(f"⚠️ Could not create JOB_INFO.json: {job_info_err}")
|
|
339
|
+
# Don't fail job setup if this fails
|
|
340
|
+
|
|
250
341
|
# NOTE: Duplicate task delivery detection is handled by task_prerun signal (see below)
|
|
251
342
|
# This checks log file mtime BEFORE the task runs to gracefully ignore duplicates
|
|
252
343
|
|
|
@@ -1799,9 +1890,6 @@ def pre_analysis_architecture(self, job_spec: dict, job_id: str, session_id: str
|
|
|
1799
1890
|
Returns:
|
|
1800
1891
|
dict with optimal config
|
|
1801
1892
|
"""
|
|
1802
|
-
from pathlib import Path
|
|
1803
|
-
import sys
|
|
1804
|
-
|
|
1805
1893
|
# Use Celery task ID if job_id not provided
|
|
1806
1894
|
if not job_id:
|
|
1807
1895
|
job_id = self.request.id
|
|
@@ -1827,8 +1915,37 @@ def pre_analysis_architecture(self, job_spec: dict, job_id: str, session_id: str
|
|
|
1827
1915
|
input_data_path = session.get('input_data')
|
|
1828
1916
|
|
|
1829
1917
|
if input_data_path:
|
|
1830
|
-
|
|
1831
|
-
|
|
1918
|
+
# Handle relative paths - check if file exists, if not try session directory
|
|
1919
|
+
input_path = Path(input_data_path)
|
|
1920
|
+
if not input_path.exists() and not input_path.is_absolute():
|
|
1921
|
+
# Try to find it in the session directory or data directory
|
|
1922
|
+
from lib.sphere_config import config
|
|
1923
|
+
session_dir = Path(config.data_dir) / session_id
|
|
1924
|
+
possible_paths = [
|
|
1925
|
+
session_dir / input_path.name,
|
|
1926
|
+
Path(config.data_dir) / input_path.name,
|
|
1927
|
+
input_path # Try original as-is
|
|
1928
|
+
]
|
|
1929
|
+
for possible_path in possible_paths:
|
|
1930
|
+
if possible_path.exists():
|
|
1931
|
+
input_data_path = str(possible_path)
|
|
1932
|
+
logger.info(f"✅ Found input data at: {input_data_path}")
|
|
1933
|
+
break
|
|
1934
|
+
else:
|
|
1935
|
+
# File not found - skip meta-learning query but don't fail
|
|
1936
|
+
logger.warning(f"⚠️ Input data file not found: {input_data_path}")
|
|
1937
|
+
logger.warning(f" Tried paths: {[str(p) for p in possible_paths]}")
|
|
1938
|
+
logger.warning(f" Skipping meta-learning query - will use adaptive sampling")
|
|
1939
|
+
raise FileNotFoundError(f"Input data file not found: {input_data_path}")
|
|
1940
|
+
|
|
1941
|
+
# File exists - try to load it
|
|
1942
|
+
try:
|
|
1943
|
+
input_file = FeatrixInputDataFile(str(input_data_path))
|
|
1944
|
+
df = input_file.df
|
|
1945
|
+
except Exception as load_err:
|
|
1946
|
+
logger.warning(f"⚠️ Failed to load input data file: {load_err}")
|
|
1947
|
+
logger.warning(f" Skipping meta-learning query - will use adaptive sampling")
|
|
1948
|
+
raise
|
|
1832
1949
|
|
|
1833
1950
|
# Query meta-learning API
|
|
1834
1951
|
query_result = query_optimal_parameters(df, target_type=None)
|
|
@@ -1893,57 +2010,219 @@ def pre_analysis_architecture(self, job_spec: dict, job_id: str, session_id: str
|
|
|
1893
2010
|
|
|
1894
2011
|
strings_cache = session.get('strings_cache')
|
|
1895
2012
|
|
|
1896
|
-
#
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
optimal_config = run_quick_architecture_search(
|
|
1908
|
-
data_file=data_file,
|
|
1909
|
-
strings_cache=strings_cache,
|
|
1910
|
-
session_id=session_id,
|
|
1911
|
-
n_samples=n_samples,
|
|
1912
|
-
quick_epochs=quick_epochs,
|
|
1913
|
-
suggested_configs=suggested_configs # Pass suggested configs
|
|
1914
|
-
)
|
|
2013
|
+
# CRITICAL: Fork/exec pre-analysis in a non-daemon session leader process
|
|
2014
|
+
# Same pattern as ES/SP training for better GPU memory management and process isolation
|
|
2015
|
+
# Serialize args to pass to subprocess
|
|
2016
|
+
args_dict = {
|
|
2017
|
+
'data_file': str(data_file) if data_file else None,
|
|
2018
|
+
'strings_cache': strings_cache or "",
|
|
2019
|
+
'session_id': session_id,
|
|
2020
|
+
'n_samples': n_samples,
|
|
2021
|
+
'quick_epochs': quick_epochs,
|
|
2022
|
+
'suggested_configs': suggested_configs
|
|
2023
|
+
}
|
|
1915
2024
|
|
|
1916
|
-
|
|
2025
|
+
# Create a temporary file to pass args
|
|
2026
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
|
|
2027
|
+
json_file = f.name
|
|
2028
|
+
json_module.dump(args_dict, f)
|
|
1917
2029
|
|
|
1918
|
-
# Save results to job directory for debugging
|
|
1919
|
-
results_file = job_dir / "pre_analysis_results.json"
|
|
1920
2030
|
try:
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
"timestamp": datetime.now().isoformat(),
|
|
1926
|
-
"meta": {
|
|
1927
|
-
"n_samples_requested": job_spec.get('n_samples', 20),
|
|
1928
|
-
"quick_epochs": job_spec.get('quick_epochs', 25),
|
|
1929
|
-
"used_suggested_configs": bool(suggested_configs)
|
|
1930
|
-
}
|
|
1931
|
-
}
|
|
2031
|
+
# Fork/exec pre-analysis in a new session leader process
|
|
2032
|
+
# This provides better process isolation and GPU memory cleanup
|
|
2033
|
+
python_cmd = sys.executable
|
|
2034
|
+
wrapper_script = Path(__file__).parent / "lib" / "pre_analysis_wrapper.py"
|
|
1932
2035
|
|
|
1933
|
-
|
|
1934
|
-
|
|
2036
|
+
if suggested_configs:
|
|
2037
|
+
logger.info(f"🚀 Forking pre-analysis process with {len(suggested_configs)} meta-learning suggestions ({quick_epochs} epochs each)")
|
|
2038
|
+
else:
|
|
2039
|
+
logger.info(f"🚀 Forking pre-analysis process ({n_samples} configs, {quick_epochs} epochs each)")
|
|
2040
|
+
logger.info(f" Python: {python_cmd}")
|
|
2041
|
+
logger.info(f" Script: {wrapper_script}")
|
|
2042
|
+
|
|
2043
|
+
# Fork/exec with setsid to create new session leader (non-daemon)
|
|
2044
|
+
# CRITICAL: Open log file in append mode so subprocess writes to same file
|
|
2045
|
+
log_file_path = job_dir / "logs" / "stdout.log"
|
|
2046
|
+
|
|
2047
|
+
# Open log file in append mode (line buffered) for subprocess
|
|
2048
|
+
with open(log_file_path, 'a', buffering=1) as log_file:
|
|
2049
|
+
process = subprocess.Popen(
|
|
2050
|
+
[python_cmd, str(wrapper_script), json_file, '--job-id', job_id],
|
|
2051
|
+
cwd=str(job_dir),
|
|
2052
|
+
stdout=log_file,
|
|
2053
|
+
stderr=subprocess.STDOUT, # Redirect stderr to stdout
|
|
2054
|
+
start_new_session=True, # setsid - become session leader (non-daemon)
|
|
2055
|
+
env=os.environ.copy() # Inherit environment
|
|
2056
|
+
)
|
|
1935
2057
|
|
|
1936
|
-
|
|
2058
|
+
pre_analysis_pid = process.pid
|
|
2059
|
+
celery_worker_pid = os.getpid()
|
|
2060
|
+
|
|
2061
|
+
# Write Celery worker mapping file for admin monitoring
|
|
2062
|
+
mapping_file = Path(f"/tmp/featrix-celery-{celery_worker_pid}.json")
|
|
2063
|
+
try:
|
|
2064
|
+
mapping_data = {
|
|
2065
|
+
"timestamp_started": datetime.now().isoformat(),
|
|
2066
|
+
"celery_worker_pid": celery_worker_pid,
|
|
2067
|
+
"pre_analysis_pid": pre_analysis_pid,
|
|
2068
|
+
"job_id": job_id,
|
|
2069
|
+
"session_id": session_id,
|
|
2070
|
+
"job_type": "pre_analysis_architecture",
|
|
2071
|
+
}
|
|
2072
|
+
with open(mapping_file, 'w') as f:
|
|
2073
|
+
json_module.dump(mapping_data, f, indent=2)
|
|
2074
|
+
logger.info(f"📝 Wrote Celery worker mapping: {mapping_file}")
|
|
2075
|
+
except Exception as e:
|
|
2076
|
+
logger.warning(f"Failed to write Celery worker mapping: {e}")
|
|
2077
|
+
|
|
2078
|
+
logger.info(f"✅ Pre-analysis process forked with PID {pre_analysis_pid} (session leader)")
|
|
2079
|
+
logger.info(f" Celery worker PID: {celery_worker_pid}")
|
|
2080
|
+
logger.info(f" Pre-analysis PID: {pre_analysis_pid}")
|
|
2081
|
+
logger.info(f" 📝 Pre-analysis output in: {log_file_path}")
|
|
2082
|
+
logger.info(f" 🔍 Monitor with: ffsh tail {session_id}/{job_id}")
|
|
2083
|
+
logger.info(f" Monitoring PID {pre_analysis_pid} until it exits...")
|
|
2084
|
+
|
|
2085
|
+
# Wait for process to complete
|
|
2086
|
+
# CRITICAL: Check for ABORT flag periodically AND update Celery state with progress
|
|
2087
|
+
abort_flag = job_dir / "ABORT"
|
|
2088
|
+
last_progress_update = 0
|
|
2089
|
+
|
|
2090
|
+
try:
|
|
2091
|
+
while True:
|
|
2092
|
+
# Check if process completed
|
|
2093
|
+
return_code = process.poll()
|
|
2094
|
+
if return_code is not None:
|
|
2095
|
+
# Process finished
|
|
2096
|
+
if return_code == 0:
|
|
2097
|
+
logger.info(f"✅ Pre-analysis process completed successfully (exit code: {return_code})")
|
|
2098
|
+
else:
|
|
2099
|
+
logger.error(f"❌ Pre-analysis process failed with exit code: {return_code}")
|
|
2100
|
+
break
|
|
2101
|
+
|
|
2102
|
+
# Check for abort flag
|
|
2103
|
+
if abort_flag.exists():
|
|
2104
|
+
logger.warning(f"⚠️ ABORT flag detected - terminating pre-analysis process")
|
|
2105
|
+
process.terminate()
|
|
2106
|
+
time.sleep(2)
|
|
2107
|
+
if process.poll() is None:
|
|
2108
|
+
logger.warning(f"⚠️ Process didn't terminate, sending SIGKILL")
|
|
2109
|
+
process.kill()
|
|
2110
|
+
break
|
|
2111
|
+
|
|
2112
|
+
# Poll Redis for progress updates (every 2 seconds, like train_single_predictor)
|
|
2113
|
+
current_time = time.time()
|
|
2114
|
+
if current_time - last_progress_update >= 2:
|
|
2115
|
+
try:
|
|
2116
|
+
from lib.job_manager import load_job
|
|
2117
|
+
job_data = load_job(job_id)
|
|
2118
|
+
if job_data:
|
|
2119
|
+
progress = job_data.get('progress', 0)
|
|
2120
|
+
current_config = job_data.get('current_config', 0)
|
|
2121
|
+
total_configs = job_data.get('total_configs', 0)
|
|
2122
|
+
current_auc = job_data.get('current_auc')
|
|
2123
|
+
best_config_so_far = job_data.get('best_config_so_far')
|
|
2124
|
+
best_auc_so_far = job_data.get('best_auc_so_far')
|
|
2125
|
+
status_message = job_data.get('status_message', 'Pre-analysis architecture search in progress...')
|
|
2126
|
+
|
|
2127
|
+
# Build status message
|
|
2128
|
+
if current_config > 0 and total_configs > 0:
|
|
2129
|
+
status = f"Testing config {current_config}/{total_configs}"
|
|
2130
|
+
if current_auc is not None:
|
|
2131
|
+
status += f" (AUC: {current_auc:.4f})"
|
|
2132
|
+
if best_config_so_far and best_auc_so_far is not None:
|
|
2133
|
+
status += f" | Best so far: d_model={best_config_so_far.get('d_model')}, layers={best_config_so_far.get('n_transformer_layers')}, heads={best_config_so_far.get('n_attention_heads')} (AUC: {best_auc_so_far:.4f})"
|
|
2134
|
+
else:
|
|
2135
|
+
status = status_message
|
|
2136
|
+
|
|
2137
|
+
# Update Celery state so client gets progress
|
|
2138
|
+
self.update_state(
|
|
2139
|
+
state='PROGRESS',
|
|
2140
|
+
meta={
|
|
2141
|
+
'status': status,
|
|
2142
|
+
'job_id': job_id,
|
|
2143
|
+
'session_id': session_id,
|
|
2144
|
+
'progress': progress,
|
|
2145
|
+
'current_config': current_config,
|
|
2146
|
+
'total_configs': total_configs,
|
|
2147
|
+
'current_auc': current_auc,
|
|
2148
|
+
'best_config_so_far': best_config_so_far,
|
|
2149
|
+
'best_auc_so_far': best_auc_so_far
|
|
2150
|
+
}
|
|
2151
|
+
)
|
|
2152
|
+
last_progress_update = current_time
|
|
2153
|
+
except Exception as e:
|
|
2154
|
+
# Don't fail if progress update fails
|
|
2155
|
+
logger.debug(f"Failed to update progress: {e}")
|
|
2156
|
+
|
|
2157
|
+
time.sleep(1) # Check every second
|
|
2158
|
+
|
|
2159
|
+
# Wait for process to fully exit
|
|
2160
|
+
final_return_code = process.wait()
|
|
2161
|
+
|
|
2162
|
+
if final_return_code != 0:
|
|
2163
|
+
raise RuntimeError(f"Pre-analysis process failed with exit code: {final_return_code}")
|
|
2164
|
+
|
|
2165
|
+
except KeyboardInterrupt:
|
|
2166
|
+
logger.warning("⚠️ Received interrupt - terminating pre-analysis process")
|
|
2167
|
+
process.terminate()
|
|
2168
|
+
time.sleep(2)
|
|
2169
|
+
if process.poll() is None:
|
|
2170
|
+
process.kill()
|
|
2171
|
+
raise
|
|
2172
|
+
finally:
|
|
2173
|
+
# Clean up temp JSON file
|
|
2174
|
+
try:
|
|
2175
|
+
os.unlink(json_file)
|
|
2176
|
+
except Exception:
|
|
2177
|
+
pass
|
|
2178
|
+
|
|
2179
|
+
# Load results from session (wrapper saves it there)
|
|
2180
|
+
from lib.session_manager import load_session
|
|
2181
|
+
session = load_session(session_id)
|
|
2182
|
+
optimal_config = session.get('optimal_es_config')
|
|
2183
|
+
|
|
2184
|
+
if not optimal_config:
|
|
2185
|
+
raise RuntimeError("Pre-analysis completed but optimal_es_config not found in session")
|
|
2186
|
+
|
|
2187
|
+
logger.info(f"✅ Quick search complete: {optimal_config}")
|
|
2188
|
+
|
|
2189
|
+
# Save results to job directory for debugging
|
|
2190
|
+
results_file = job_dir / "pre_analysis_results.json"
|
|
2191
|
+
try:
|
|
2192
|
+
# Get all tested configs from session if available
|
|
2193
|
+
session = load_session(session_id)
|
|
2194
|
+
pre_analysis_results = session.get('pre_analysis_results', {})
|
|
2195
|
+
all_tested_configs = pre_analysis_results.get('all_tested_configs', [])
|
|
2196
|
+
|
|
2197
|
+
results_output = {
|
|
2198
|
+
"optimal_config": optimal_config,
|
|
2199
|
+
"session_id": session_id,
|
|
2200
|
+
"job_id": job_id,
|
|
2201
|
+
"timestamp": datetime.now().isoformat(),
|
|
2202
|
+
"all_tested_configs": all_tested_configs, # Include all tested configs
|
|
2203
|
+
"meta": {
|
|
2204
|
+
"n_samples_requested": job_spec.get('n_samples', 20),
|
|
2205
|
+
"quick_epochs": job_spec.get('quick_epochs', 25),
|
|
2206
|
+
"used_suggested_configs": bool(suggested_configs),
|
|
2207
|
+
"total_configs_tested": len(all_tested_configs),
|
|
2208
|
+
"successful_configs": len([c for c in all_tested_configs if c.get('tested', False)]),
|
|
2209
|
+
"failed_configs": len([c for c in all_tested_configs if not c.get('tested', False)])
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
|
|
2213
|
+
with open(results_file, 'w') as f:
|
|
2214
|
+
json.dump(results_output, f, indent=2, default=str)
|
|
2215
|
+
|
|
2216
|
+
logger.info(f"💾 Saved pre-analysis results to {results_file} (including {len(all_tested_configs)} tested configs)")
|
|
2217
|
+
except Exception as e:
|
|
2218
|
+
logger.warning(f"⚠️ Failed to save pre-analysis results: {e}")
|
|
2219
|
+
|
|
2220
|
+
logger.info(f"💾 Optimal config loaded from session")
|
|
2221
|
+
|
|
1937
2222
|
except Exception as e:
|
|
1938
|
-
logger.
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
from lib.session_manager import load_session, save_session
|
|
1942
|
-
session = load_session(session_id)
|
|
1943
|
-
session['optimal_es_config'] = optimal_config
|
|
1944
|
-
save_session(session_id, session, exist_ok=True)
|
|
1945
|
-
|
|
1946
|
-
logger.info(f"💾 Saved optimal config to session")
|
|
2223
|
+
logger.error(f"❌ Failed to run pre-analysis: {e}")
|
|
2224
|
+
traceback.print_exc()
|
|
2225
|
+
raise
|
|
1947
2226
|
|
|
1948
2227
|
# Dispatch next job (train_es)
|
|
1949
2228
|
try:
|
|
@@ -2511,7 +2790,6 @@ def train_single_predictor(self, *args, **kwargs):
|
|
|
2511
2790
|
|
|
2512
2791
|
# Wait for process to complete
|
|
2513
2792
|
# CRITICAL: Check for ABORT flag periodically AND update Celery state with progress
|
|
2514
|
-
import time
|
|
2515
2793
|
abort_flag = job_dir / "ABORT"
|
|
2516
2794
|
last_progress_update = 0
|
|
2517
2795
|
|