featrixsphere 0.2.1830__tar.gz → 0.2.2280__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/PKG-INFO +1 -1
  2. featrixsphere-0.2.2280/VERSION +1 -0
  3. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrix-update.py +75 -14
  4. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere/__init__.py +1 -1
  5. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere/client.py +331 -68
  6. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/PKG-INFO +1 -1
  7. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/SOURCES.txt +21 -3
  8. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/requirements.txt +0 -1
  9. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/api.py +614 -94
  10. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/celery_app.py +1036 -256
  11. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/config.py +9 -5
  12. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/event_log.py +1 -1
  13. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/featrix_watchdog.py +11 -6
  14. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/celery_job_recovery.py +133 -6
  15. featrixsphere-0.2.2280/src/lib/distribution_shift_detector.py +693 -0
  16. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/epoch_projections.py +1 -1
  17. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/es_projections.py +1 -1
  18. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/es_training.py +53 -18
  19. featrixsphere-0.2.2280/src/lib/es_training_wrapper.py +131 -0
  20. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/__init__.py +1 -1
  21. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/__init__.py +2 -2
  22. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/calibration_utils.py +8 -1
  23. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/classification_metrics.py +13 -12
  24. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/config.py +1 -1
  25. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/data_frame_data_set.py +1 -1
  26. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/dataloader_utils.py +44 -22
  27. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/detect.py +1 -1
  28. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/domain_codec.py +3 -3
  29. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedded_space.py +651 -250
  30. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedding_lr_scheduler.py +1 -1
  31. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedding_space_utils.py +1 -1
  32. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/embedding_utils.py +1 -1
  33. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/encoders.py +69 -19
  34. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/enrich.py +1 -1
  35. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/es_projection.py +1 -1
  36. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/exceptions.py +1 -1
  37. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_csv.py +1 -1
  38. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_json.py +1 -1
  39. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_module_dict.py +1 -1
  40. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/featrix_token.py +7 -5
  41. featrixsphere-0.2.2280/src/lib/featrix/neural/feature_engineer.py +306 -0
  42. featrixsphere-0.2.2280/src/lib/featrix/neural/feature_suggestion_tracker.py +372 -0
  43. featrixsphere-0.2.2280/src/lib/featrix/neural/gpu_utils.py +724 -0
  44. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/guardrails.py +1 -1
  45. featrixsphere-0.2.2280/src/lib/featrix/neural/hybrid_column_detector.py +466 -0
  46. featrixsphere-0.2.2280/src/lib/featrix/neural/hybrid_encoders.py +344 -0
  47. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/input_data_file.py +15 -8
  48. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/input_data_set.py +419 -44
  49. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/integrity.py +1 -1
  50. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/io_utils.py +497 -127
  51. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/json_cache.py +2 -2
  52. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/json_codec.py +2 -2
  53. featrixsphere-0.2.2280/src/lib/featrix/neural/llm/__init__.py +6 -0
  54. featrixsphere-0.2.2280/src/lib/featrix/neural/llm/schema_analyzer.py +143 -0
  55. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/logging_config.py +23 -6
  56. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/mask_tracker.py +5 -14
  57. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/model_config.py +52 -1
  58. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/model_hash.py +1 -1
  59. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/movie_frame_task.py +3 -6
  60. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/network_viz.py +1 -1
  61. featrixsphere-0.2.2280/src/lib/featrix/neural/platform_utils.py +84 -0
  62. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/prng_control.py +6 -6
  63. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_adaptive_training.py +1 -1
  64. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_checkpoint_dict_reconstruction.py +1 -1
  65. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_confusion_matrix_metadata.py +1 -1
  66. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_embedding_quality.py +1 -1
  67. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_embedding_space.py +1 -1
  68. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_extend_embedding_space.py +1 -1
  69. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_focal_comparison.py +1 -1
  70. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_focal_comparison_enhanced.py +1 -1
  71. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_focal_loss_single_predictor.py +1 -1
  72. featrixsphere-0.2.2280/src/lib/featrix/neural/qa/test_hybrid_columns.py +481 -0
  73. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_label_smoothing.py +1 -1
  74. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_monitor_integration.py +1 -1
  75. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_timeline_quick.py +1 -1
  76. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_training_monitor.py +1 -1
  77. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/scalar_codec.py +5 -4
  78. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/set_codec.py +30 -15
  79. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/setlist_codec.py +10 -4
  80. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/simple_mlp.py +1 -1
  81. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/single_predictor.py +1082 -393
  82. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/sqlite_utils.py +1 -1
  83. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/stopwatch.py +1 -1
  84. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_analysis.py +1 -1
  85. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_cache.py +135 -32
  86. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_codec.py +279 -35
  87. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/timestamp_codec.py +1 -1
  88. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_context_manager.py +1 -1
  89. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_history_db.py +1 -1
  90. featrixsphere-0.2.2280/src/lib/featrix/neural/training_logger.py +974 -0
  91. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/transformer_encoder.py +50 -1
  92. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/url_codec.py +3 -3
  93. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/url_parser.py +1 -1
  94. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/utils.py +29 -14
  95. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/vector_codec.py +4 -4
  96. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/world_data.py +1 -1
  97. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/job_manager.py +14 -9
  98. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/json_encoder_cache.py +1 -1
  99. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/knn_training.py +7 -4
  100. featrixsphere-0.2.2280/src/lib/meta_learning_client.py +339 -0
  101. featrixsphere-0.2.2280/src/lib/quick_architecture_search.py +292 -0
  102. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/session_chains.py +175 -19
  103. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/session_manager.py +415 -81
  104. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/single_predictor_cv.py +1 -1
  105. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/single_predictor_training.py +451 -180
  106. featrixsphere-0.2.2280/src/lib/sp_training_wrapper.py +99 -0
  107. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/structureddata.py +2 -4
  108. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/training_monitor.py +1 -1
  109. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/utils.py +1 -1
  110. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/node-install.sh +214 -120
  111. featrixsphere-0.2.2280/src/query_schema_worker.py +255 -0
  112. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/render_sphere.py +1 -1
  113. featrixsphere-0.2.2280/src/repair_checkpoint.py +246 -0
  114. featrixsphere-0.2.2280/src/start_celery_gpu_worker.sh +142 -0
  115. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/version.py +24 -9
  116. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/system_monitor.py +26 -8
  117. featrixsphere-0.2.2280/tests/test_client_data.py +145 -0
  118. featrixsphere-0.2.2280/tests/test_client_predictions.py +266 -0
  119. featrixsphere-0.2.2280/tests/test_client_sessions.py +268 -0
  120. featrixsphere-0.2.2280/tests/test_client_training.py +149 -0
  121. featrixsphere-0.2.2280/tests/test_local_integration.py +270 -0
  122. featrixsphere-0.2.1830/VERSION +0 -1
  123. featrixsphere-0.2.1830/src/lib/distribution_shift_detector.py +0 -481
  124. featrixsphere-0.2.1830/src/lib/es_training_wrapper.py +0 -60
  125. featrixsphere-0.2.1830/src/lib/featrix/neural/MetaDataCache.py +0 -203
  126. featrixsphere-0.2.1830/src/lib/featrix/neural/device.py +0 -40
  127. featrixsphere-0.2.1830/src/start_celery_gpu_worker.sh +0 -38
  128. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/MANIFEST.in +0 -0
  129. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/README.md +0 -0
  130. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere/test_client.py +0 -0
  131. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/dependency_links.txt +0 -0
  132. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/entry_points.txt +0 -0
  133. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/not-zip-safe +0 -0
  134. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/requires.txt +0 -0
  135. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/featrixsphere.egg-info/top_level.txt +0 -0
  136. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/nv-install.sh +0 -0
  137. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/setup.cfg +0 -0
  138. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/setup.py +0 -0
  139. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/auto_upgrade_monitor.py +0 -0
  140. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/build_version.py +0 -0
  141. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/demo_existing_model.py +0 -0
  142. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/demo_label_updates.py +0 -0
  143. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/deploy.py +0 -0
  144. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/deploy_cache_debug.sh +0 -0
  145. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/ensure_watchdog_running.sh +0 -0
  146. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/error_tracker.py +0 -0
  147. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/example_api_usage.py +0 -0
  148. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/example_prediction_feedback.py +0 -0
  149. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/example_train_predictor.py +0 -0
  150. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/gc_cleanup.py +0 -0
  151. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/api_event_retry.py +0 -0
  152. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/convergence_monitor.py +0 -0
  153. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/crash_tracker.py +0 -0
  154. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/dropout_scheduler.py +0 -0
  155. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/hubspot_free_domains_list_may_2025.py +0 -0
  156. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/demo_advisor_decisions.py +0 -0
  157. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/example_complete_workflow.py +0 -0
  158. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/generate_focal_report.py +0 -0
  159. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/model_advisor.py +0 -0
  160. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/show_results.py +0 -0
  161. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_piecewise_epochs.py +0 -0
  162. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_predict_during_training.py +0 -0
  163. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/test_warning_tracking.py +0 -0
  164. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/qa/visualize_training_timeline.py +0 -0
  165. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/single_predictor_mlp.py +0 -0
  166. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/sphere_config.py +0 -0
  167. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/string_list_codec.py +0 -0
  168. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_event.py +0 -0
  169. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix/neural/training_exceptions.py +0 -0
  170. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/featrix_debug.py +0 -0
  171. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/queue_manager.py +0 -0
  172. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/sphere_config.py +0 -0
  173. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/system_health_monitor.py +0 -0
  174. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/vector_db.py +0 -0
  175. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/webhook_helpers.py +0 -0
  176. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/lib/weightwatcher_tracking.py +0 -0
  177. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/llm_client.py +0 -0
  178. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/manage_churro.sh +0 -0
  179. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/migrate_string_cache_naming.py +0 -0
  180. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/neural.py +0 -0
  181. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/prediction_drift_monitor.py +0 -0
  182. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/prediction_persistence_worker.py +0 -0
  183. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/quick_test_deployment.sh +0 -0
  184. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/recreate_session.py +0 -0
  185. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/redis_job_progress.py +0 -0
  186. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/redis_prediction_cli.py +0 -0
  187. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/redis_prediction_store.py +0 -0
  188. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/regenerate_training_movie.py +0 -0
  189. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/restart_celery_worker.sh +0 -0
  190. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/run_api_server.sh +0 -0
  191. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/send_email.py +0 -0
  192. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/slack.py +0 -0
  193. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/standalone_prediction.py +0 -0
  194. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/start_celery_cpu_worker.sh +0 -0
  195. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/start_celery_worker.sh +0 -0
  196. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/start_churro_server.sh +0 -0
  197. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/tail-watch.py +0 -0
  198. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_api_client.py +0 -0
  199. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_complete_workflow.py +0 -0
  200. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_json_tables_prediction.py +0 -0
  201. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_redis_predictions.py +0 -0
  202. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_server_connection.py +0 -0
  203. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_session_models.py +0 -0
  204. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_single_predictor_api.py +0 -0
  205. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/test_upload_endpoint.py +0 -0
  206. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/tree.py +0 -0
  207. {featrixsphere-0.2.1830 → featrixsphere-0.2.2280}/src/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: featrixsphere
3
- Version: 0.2.1830
3
+ Version: 0.2.2280
4
4
  Summary: Transform any CSV into a production-ready ML model in minutes, not months.
5
5
  Home-page: https://github.com/Featrix/sphere
6
6
  Author: Featrix
@@ -0,0 +1 @@
1
+ 0.2.2278
@@ -208,9 +208,13 @@ def find_newest_version(index: Dict[str, Any]) -> Optional[Dict[str, Any]]:
208
208
  print("⚠️ No files with version information found")
209
209
  return None
210
210
 
211
- # Sort by version (newest first)
211
+ # Sort by version (newest first), then by date_modified (newest first) as tiebreaker
212
+ # This ensures when multiple builds have the same version, we get the most recent one
212
213
  versioned_files.sort(
213
- key=lambda x: tuple(int(p) for p in x['version'].split('.')[:3]),
214
+ key=lambda x: (
215
+ tuple(int(p) for p in x['version'].split('.')[:3]),
216
+ x.get('date_modified', '')
217
+ ),
214
218
  reverse=True
215
219
  )
216
220
 
@@ -267,16 +271,62 @@ def install_package(package_file: Path, force: bool = False) -> bool:
267
271
  print(f" Package version: {package_version}")
268
272
  print(f" Package hash: {package_hash}")
269
273
 
270
- # Check if already deployed (unless forced)
271
- if not force:
272
- deployed_hash = None
273
- if Path("/sphere/app/VERSION_HASH").exists():
274
- deployed_hash = Path("/sphere/app/VERSION_HASH").read_text().strip()
275
-
276
- if deployed_hash and package_hash != "unknown" and package_hash == deployed_hash:
274
+ # Check if already deployed
275
+ deployed_hash = None
276
+ deployed_version = None
277
+ if Path("/sphere/app/VERSION_HASH").exists():
278
+ deployed_hash = Path("/sphere/app/VERSION_HASH").read_text().strip()
279
+ if Path("/sphere/app/VERSION").exists():
280
+ deployed_version = Path("/sphere/app/VERSION").read_text().strip()
281
+
282
+ same_hash = deployed_hash and package_hash != "unknown" and package_hash == deployed_hash
283
+ same_version = deployed_version and package_version != "unknown" and package_version == deployed_version
284
+
285
+ if same_hash and same_version:
286
+ if not force:
277
287
  print(f"\n⏭️ This package is already deployed (hash: {package_hash})")
278
288
  print(f" Skipping installation. Use --force to reinstall anyway.")
279
289
  return True
290
+ else:
291
+ # FORCE REINSTALL OF SAME BUILD - MAKE IT SUPER OBVIOUS
292
+ import time
293
+ print()
294
+ print()
295
+ print("\033[1;33m" + "╔" + "=" * 78 + "╗" + "\033[0m")
296
+ print("\033[1;33m" + "║" + " " * 78 + "║" + "\033[0m")
297
+ print("\033[1;33m" + "║" + " ⚠️ ⚠️ ⚠️ REINSTALLING THE EXACT SAME BUILD ⚠️ ⚠️ ⚠️".ljust(78) + "║" + "\033[0m")
298
+ print("\033[1;33m" + "║" + " " * 78 + "║" + "\033[0m")
299
+ print("\033[1;33m" + "╚" + "=" * 78 + "╝" + "\033[0m")
300
+ print()
301
+ print("\033[1;33m" + "⚠️ WARNING: You are using --force to reinstall the SAME build!" + "\033[0m")
302
+ print()
303
+ print(f" Currently installed:")
304
+ print(f" Version: {deployed_version}")
305
+ print(f" Hash: {deployed_hash}")
306
+ print()
307
+ print(f" Package to install:")
308
+ print(f" Version: {package_version}")
309
+ print(f" Hash: {package_hash}")
310
+ print()
311
+ print("\033[1;33m" + " 👉 THIS IS THE EXACT SAME BUILD (version AND hash match)" + "\033[0m")
312
+ print()
313
+ print(" This will:")
314
+ print(" • Kill and restart all services")
315
+ print(" • Copy the exact same files over existing files")
316
+ print(" • Take 2-3 minutes to complete")
317
+ print()
318
+ print(" Common reasons to do this:")
319
+ print(" • Testing deployment process")
320
+ print(" • Services are broken and need clean restart")
321
+ print(" • Files were manually modified and need to be restored")
322
+ print()
323
+ for i in range(10, 0, -1):
324
+ print(f"\r ⏳ Starting reinstall in {i} seconds... (Ctrl+C to abort)", end='', flush=True)
325
+ time.sleep(1)
326
+ print()
327
+ print()
328
+ print("\033[1;32m" + "▶️ Proceeding with reinstall..." + "\033[0m")
329
+ print()
280
330
 
281
331
  # Find node-install.sh
282
332
  install_script = None
@@ -306,10 +356,14 @@ def install_package(package_file: Path, force: bool = False) -> bool:
306
356
  # 1. We've already done version checking in featrix-update.py
307
357
  # 2. The package is a specific version we want to install
308
358
  # 3. node-install.sh checks git state from /home/mitch/sphere which may not match the package
359
+
360
+ # node-install.sh REQUIRES root (checked at line 326)
361
+ # It calls 'sbit fix-permissions' which needs sbit to have setuid bit
362
+ # The install script itself must run as root
309
363
  cmd = ["sudo", str(install_script), "--force"]
310
364
  print(f" Using --force flag (installing from package)")
311
365
 
312
- # Run the install script with sudo
366
+ # Run the install script
313
367
  result = subprocess.run(
314
368
  cmd,
315
369
  check=True,
@@ -440,12 +494,19 @@ def main():
440
494
  print(f"\n✅ Update available: {current_version} → {newest_version}")
441
495
  should_update = True
442
496
  elif comparison == 0:
443
- print(f"\n✅ Already on latest version: {current_version}")
444
- if args.force:
445
- print(" --force flag set, will reinstall anyway")
497
+ # Same version - check if hash is different (newer build of same version)
498
+ current_hash = get_current_version_hash()
499
+ if current_hash and newest_hash and current_hash != newest_hash:
500
+ print(f"\n✅ Newer build available: {current_version} ({current_hash} → {newest_hash[:8]})")
501
+ print(f" Same version number but different hash (newer build)")
446
502
  should_update = True
447
503
  else:
448
- should_update = False
504
+ print(f"\n✅ Already on latest version: {current_version}")
505
+ if args.force:
506
+ print(" --force flag set, will reinstall anyway")
507
+ should_update = True
508
+ else:
509
+ should_update = False
449
510
  else:
450
511
  print(f"\n⚠️ Current version ({current_version}) is newer than available ({newest_version})")
451
512
  if args.force:
@@ -38,7 +38,7 @@ Example:
38
38
  ... labels=['Experiment A', 'Experiment B'])
39
39
  """
40
40
 
41
- __version__ = "0.2.1830"
41
+ __version__ = "0.2.2280"
42
42
  __author__ = "Featrix"
43
43
  __email__ = "support@featrix.com"
44
44
  __license__ = "MIT"
@@ -565,7 +565,7 @@ class FeatrixSphereClient:
565
565
  """Make a DELETE request and return JSON response."""
566
566
  response = self._make_request("DELETE", endpoint, max_retries=max_retries, **kwargs)
567
567
  return self._unwrap_response(response.json())
568
-
568
+
569
569
  # =========================================================================
570
570
  # Session Management
571
571
  # =========================================================================
@@ -893,6 +893,113 @@ class FeatrixSphereClient:
893
893
  """
894
894
  response_data = self._post_json(f"/compute/session/{session_id}/unpublish", {})
895
895
  return response_data
896
+
897
+ def publish_partial_foundation(
898
+ self,
899
+ source_session_id: str,
900
+ name: str,
901
+ checkpoint_epoch: int = None,
902
+ session_name_prefix: str = None,
903
+ publish: bool = True,
904
+ verbose: bool = True
905
+ ) -> Dict[str, Any]:
906
+ """
907
+ Publish a checkpoint from in-progress training as a standalone foundation model.
908
+
909
+ Takes a checkpoint from ongoing ES training and creates a NEW foundation model
910
+ session with full provenance tracking. Perfect for snapshotting good intermediate
911
+ models while training continues.
912
+
913
+ The new foundation model can be used with:
914
+ - train_on_foundational_model() - Train predictors on it
915
+ - Any standard foundation model operations
916
+ - Available across all compute nodes via backplane
917
+
918
+ Args:
919
+ source_session_id: Session with ES training (in-progress or completed)
920
+ name: Name for the new foundation model (REQUIRED)
921
+ checkpoint_epoch: Which epoch checkpoint to use (None = best/latest)
922
+ session_name_prefix: Optional prefix for new session ID
923
+ publish: Move to /sphere/published/ directory (default: True)
924
+ verbose: Print status updates
925
+
926
+ Returns:
927
+ dict with:
928
+ - foundation_session_id: New foundation session ID
929
+ - checkpoint_epoch: Epoch used
930
+ - provenance: Full metadata about source and training progress
931
+ - published_path: Path if published
932
+
933
+ Example:
934
+ ```python
935
+ # Snapshot epoch 50 as foundation v0.5 while training continues
936
+ result = client.publish_partial_foundation(
937
+ source_session_id="abc-123",
938
+ name="My Foundation v0.5",
939
+ checkpoint_epoch=50,
940
+ session_name_prefix="foundation-v0.5",
941
+ publish=True
942
+ )
943
+
944
+ foundation_id = result['foundation_session_id']
945
+ print(f"Published foundation: {foundation_id}")
946
+ print(f"Source was {result['provenance']['training_progress_percent']}% trained")
947
+
948
+ # Use immediately like any foundation model
949
+ client.train_on_foundational_model(
950
+ foundation_model_id=foundation_id,
951
+ target_column="price",
952
+ target_column_type="scalar"
953
+ )
954
+ # Available on all compute nodes automatically via backplane
955
+ ```
956
+ """
957
+ if verbose:
958
+ print(f"📦 Publishing partial foundation from {source_session_id}")
959
+ print(f" Name: {name}")
960
+ if checkpoint_epoch is not None:
961
+ print(f" Checkpoint epoch: {checkpoint_epoch}")
962
+ else:
963
+ print(f" Checkpoint epoch: best/latest available")
964
+ print(f" Publish to /sphere/published/: {publish}")
965
+
966
+ data = {
967
+ 'name': name,
968
+ 'publish': publish
969
+ }
970
+
971
+ if checkpoint_epoch is not None:
972
+ data['checkpoint_epoch'] = checkpoint_epoch
973
+ if session_name_prefix:
974
+ data['session_name_prefix'] = session_name_prefix
975
+
976
+ try:
977
+ response_data = self._post_json(
978
+ f"/compute/session/{source_session_id}/publish_partial_foundation",
979
+ data
980
+ )
981
+
982
+ foundation_id = response_data.get('foundation_session_id')
983
+ checkpoint_used = response_data.get('checkpoint_epoch')
984
+ provenance = response_data.get('provenance', {})
985
+
986
+ if verbose:
987
+ print(f"✅ {response_data.get('message')}")
988
+ print(f" Foundation session ID: {foundation_id}")
989
+ print(f" Checkpoint epoch: {checkpoint_used}")
990
+ if provenance.get('training_progress_percent'):
991
+ print(f" Source training progress: {provenance['training_progress_percent']}%")
992
+ if provenance.get('validation_loss_at_checkpoint'):
993
+ print(f" Val loss at checkpoint: {provenance['validation_loss_at_checkpoint']:.4f}")
994
+ if response_data.get('published_path'):
995
+ print(f" Published to: {response_data['published_path']}")
996
+
997
+ return response_data
998
+
999
+ except Exception as e:
1000
+ if verbose:
1001
+ print(f"❌ Error publishing partial foundation: {e}")
1002
+ raise
896
1003
 
897
1004
  def get_sessions_for_org(self, name_prefix: str, max_retries: int = None) -> Dict[str, Any]:
898
1005
  """
@@ -1974,9 +2081,30 @@ class FeatrixSphereClient:
1974
2081
  - Category split: Use full data for ES, specific categories for predictor
1975
2082
  - Label completeness: Include unlabeled rows in ES, exclude from predictor
1976
2083
  - Test/holdout: Keep test data in ES context but exclude from predictor training
2084
+
2085
+ Special Input: Dictionary of Datasets
2086
+ --------------------------------------
2087
+ You can pass a dictionary of datasets instead of a single DataFrame. Each key is a dataset name,
2088
+ and each value is a list of DataFrames/tables to include in that dataset.
2089
+
2090
+ When using this format:
2091
+ - A __featrix_dataset_name column is automatically added to track which dataset each row came from
2092
+ - All tables from all datasets are concatenated into a single DataFrame before upload
2093
+ - The concatenated DataFrame is uploaded as normal
2094
+
2095
+ Example - Upload multiple datasets with labels:
2096
+
2097
+ datasets = {
2098
+ 'training_data': [df1, df2, df3],
2099
+ 'validation_data': [df4, df5],
2100
+ 'test_data': [df6]
2101
+ }
2102
+
2103
+ session = client.upload_df_and_create_session(df=datasets)
2104
+ # Uploads a single DataFrame with __featrix_dataset_name column indicating source
1977
2105
 
1978
2106
  Args:
1979
- df: pandas DataFrame to upload (optional if file_path is provided)
2107
+ df: pandas DataFrame OR dict of {dataset_name: [DataFrames]} to upload (optional if file_path is provided)
1980
2108
  filename: Name to give the uploaded file (default: "data.csv")
1981
2109
  file_path: Path to CSV, Parquet, JSON, or JSONL file to upload (optional if df is provided)
1982
2110
  column_overrides: Dict mapping column names to types ("scalar", "set", "free_string", "free_string_list")
@@ -2005,6 +2133,80 @@ class FeatrixSphereClient:
2005
2133
  if column_types is not None:
2006
2134
  column_overrides = column_types
2007
2135
 
2136
+ # Handle dictionary of datasets input
2137
+ if df is not None and isinstance(df, dict):
2138
+ print("Detected dictionary of datasets - concatenating with __featrix_dataset_name labels")
2139
+ all_dataframes = []
2140
+ total_rows = 0
2141
+
2142
+ for dataset_name, tables in df.items():
2143
+ if not isinstance(tables, list):
2144
+ raise ValueError(f"Value for dataset '{dataset_name}' must be a list of DataFrames/file paths, got {type(tables)}")
2145
+
2146
+ for i, table in enumerate(tables):
2147
+ # Handle file path (string)
2148
+ if isinstance(table, str):
2149
+ file_path_to_load = str(table)
2150
+
2151
+ if not os.path.exists(file_path_to_load):
2152
+ raise FileNotFoundError(f"File not found in dataset '{dataset_name}': {file_path_to_load}")
2153
+
2154
+ # Determine file type and load
2155
+ file_ext = file_path_to_load.lower()
2156
+ print(f" - {dataset_name} loading file: {os.path.basename(file_path_to_load)}")
2157
+
2158
+ if file_ext.endswith('.parquet'):
2159
+ loaded_df = pd.read_parquet(file_path_to_load)
2160
+ elif file_ext.endswith(('.json', '.jsonl')):
2161
+ try:
2162
+ from featrix.neural.input_data_file import featrix_wrap_read_json_file
2163
+ loaded_df = featrix_wrap_read_json_file(file_path_to_load)
2164
+ if loaded_df is None:
2165
+ raise ValueError(f"Failed to parse {'JSONL' if file_ext.endswith('.jsonl') else 'JSON'} file")
2166
+ except ImportError:
2167
+ # Fallback to pandas
2168
+ if file_ext.endswith('.jsonl'):
2169
+ import json
2170
+ records = []
2171
+ with open(file_path_to_load, 'r', encoding='utf-8') as f:
2172
+ for line in f:
2173
+ if line.strip():
2174
+ records.append(json.loads(line))
2175
+ loaded_df = pd.DataFrame(records)
2176
+ else:
2177
+ loaded_df = pd.read_json(file_path_to_load)
2178
+ elif file_ext.endswith(('.csv', '.csv.gz')):
2179
+ loaded_df = pd.read_csv(file_path_to_load)
2180
+ else:
2181
+ raise ValueError(f"Unsupported file type in dataset '{dataset_name}': {file_path_to_load}. "
2182
+ f"Supported: .csv, .csv.gz, .parquet, .json, .jsonl")
2183
+
2184
+ labeled_table = loaded_df
2185
+ print(f" Loaded {len(loaded_df)} rows, {len(loaded_df.columns)} columns")
2186
+
2187
+ # Handle DataFrame
2188
+ elif isinstance(table, pd.DataFrame):
2189
+ # Create a copy to avoid modifying the original
2190
+ labeled_table = table.copy()
2191
+ print(f" - {dataset_name} DataFrame {i+1}: {len(labeled_table)} rows, {len(labeled_table.columns)} columns")
2192
+
2193
+ else:
2194
+ raise ValueError(f"Table {i} in dataset '{dataset_name}' must be a pandas DataFrame or file path (str), got {type(table)}")
2195
+
2196
+ # Add the dataset name label column
2197
+ labeled_table['__featrix_dataset_name'] = dataset_name
2198
+
2199
+ all_dataframes.append(labeled_table)
2200
+ total_rows += len(labeled_table)
2201
+
2202
+ if not all_dataframes:
2203
+ raise ValueError("No DataFrames found in the provided dictionary")
2204
+
2205
+ # Concatenate all dataframes
2206
+ print(f"Concatenating {len(all_dataframes)} tables from {len(df)} datasets ({total_rows} total rows)")
2207
+ df = pd.concat(all_dataframes, ignore_index=True)
2208
+ print(f"Combined DataFrame: {len(df)} rows, {len(df.columns)} columns (includes __featrix_dataset_name)")
2209
+
2008
2210
  # Validate inputs
2009
2211
  if df is None and file_path is None:
2010
2212
  raise ValueError("Either df or file_path must be provided")
@@ -4236,25 +4438,21 @@ class FeatrixSphereClient:
4236
4438
  def clone_in_progress_embedding_space(self, session_id: str, from_compute: str, to_compute: str,
4237
4439
  es_id: str = None, new_session_name: str = None) -> Dict[str, Any]:
4238
4440
  """
4239
- Clone a partially-trained embedding space from one compute node to another.
4441
+ INTERNAL: Clone embedding space between compute nodes.
4240
4442
 
4241
- This creates a new session on the destination node with the embedding space and strings cache
4242
- transferred in 512MB chunks. The new session will be marked as "ready" and can immediately
4243
- be used to train single predictors.
4443
+ Note: With the backplane system, users generally don't need to manually clone.
4444
+ Sessions are automatically available across all compute nodes.
4445
+ This method is kept for backward compatibility and special cases.
4244
4446
 
4245
4447
  Args:
4246
- session_id: Source session ID containing the embedding space to clone
4247
- from_compute: Source compute node name (e.g., 'taco', 'churro', 'burrito')
4248
- to_compute: Destination compute node name
4249
- es_id: Optional ES ID to clone (required if session has multiple embedding spaces)
4250
- new_session_name: Optional name for the new cloned session
4448
+ session_id: Source session ID
4449
+ from_compute: Source node name
4450
+ to_compute: Destination node name
4451
+ es_id: Optional ES ID (if session has multiple)
4452
+ new_session_name: Optional name for cloned session
4251
4453
 
4252
4454
  Returns:
4253
- Dict with new_session_id from destination node
4254
-
4255
- Raises:
4256
- ValueError: If multiple ES found in session and es_id not provided
4257
- HTTPException: If cloning fails
4455
+ Dict with new_session_id
4258
4456
  """
4259
4457
  # Prepare request data
4260
4458
  request_data = {
@@ -4322,40 +4520,6 @@ class FeatrixSphereClient:
4322
4520
  print(f"Training predictor on foundation model {foundation_model_id}...")
4323
4521
  print(f" Target: {target_column} ({target_column_type})")
4324
4522
 
4325
- # Get the compute cluster from the foundation model session
4326
- # This ensures we upload files to the same node where the foundation model lives
4327
- # If the foundation session doesn't exist (404), we'll proceed with current compute cluster
4328
- foundation_compute_cluster = None
4329
- try:
4330
- foundation_session = self.get_session_status(foundation_model_id)
4331
- foundation_compute_cluster = self.get_last_server_metadata()
4332
- foundation_compute_cluster = foundation_compute_cluster.get('compute_cluster') if foundation_compute_cluster else None
4333
- except Exception as e:
4334
- # Foundation session might not exist or be accessible - that's okay
4335
- # The server will validate it when we submit the training request
4336
- if verbose:
4337
- # Check if it's a 404 HTTP error
4338
- is_404 = False
4339
- if isinstance(e, requests.exceptions.HTTPError):
4340
- if hasattr(e, 'response') and e.response.status_code == 404:
4341
- is_404 = True
4342
-
4343
- if is_404:
4344
- print(f" ⚠️ Foundation session not found (404) - will use current compute cluster")
4345
- print(f" Server will validate foundation model when training starts")
4346
- else:
4347
- print(f" ⚠️ Could not fetch foundation session: {e}")
4348
- print(f" Will proceed with current compute cluster")
4349
-
4350
- # Temporarily set compute cluster for file uploads if we found one
4351
- original_compute_cluster = self.compute_cluster
4352
- original_headers = self.session.headers.copy()
4353
- if foundation_compute_cluster:
4354
- self.set_compute_cluster(foundation_compute_cluster)
4355
- if verbose:
4356
- print(f" Using compute cluster: {foundation_compute_cluster}")
4357
- elif verbose and self.compute_cluster:
4358
- print(f" Using current compute cluster: {self.compute_cluster}")
4359
4523
 
4360
4524
  try:
4361
4525
  # Validate that only one data source is provided
@@ -4453,20 +4617,9 @@ class FeatrixSphereClient:
4453
4617
 
4454
4618
  new_session_id = response_data.get('session_id')
4455
4619
  print(f"✅ Predictor training session created: {new_session_id}")
4456
-
4457
- # Restore original compute cluster setting
4458
- if original_compute_cluster != self.compute_cluster:
4459
- if original_compute_cluster:
4460
- self.set_compute_cluster(original_compute_cluster)
4461
- else:
4462
- self.session.headers = original_headers
4463
- finally:
4464
- # Ensure we restore headers even if there's an error
4465
- if original_compute_cluster != self.compute_cluster:
4466
- if original_compute_cluster:
4467
- self.set_compute_cluster(original_compute_cluster)
4468
- else:
4469
- self.session.headers = original_headers
4620
+
4621
+ except Exception as e:
4622
+ raise
4470
4623
 
4471
4624
  if verbose:
4472
4625
  print(f"⏳ Waiting for training to complete...")
@@ -4854,14 +5007,44 @@ class FeatrixSphereClient:
4854
5007
  The system handles the hard decisions so you can focus on your problem, not
4855
5008
  hyperparameter tuning.
4856
5009
 
5010
+ MULTI-DATASET INPUT (NEW):
5011
+ ---------------------------
5012
+ You can now pass a dictionary of datasets for the `df` parameter, just like in
5013
+ upload_df_and_create_session(). This is useful when combining multiple sources
5014
+ for predictor training:
5015
+
5016
+ ```python
5017
+ # Train predictor on multiple datasets with labels
5018
+ training_data = {
5019
+ 'extra_rows_from_matt': ['matt_supplement.csv', 'matt_additions.parquet'],
5020
+ 'main_training': [df1, df2, 'training.csv'],
5021
+ 'validation_samples': ['validation.csv']
5022
+ }
5023
+
5024
+ result = client.train_single_predictor(
5025
+ session_id=session.session_id,
5026
+ df=training_data, # Dictionary of datasets
5027
+ target_column='outcome',
5028
+ target_column_type='set'
5029
+ )
5030
+ ```
5031
+
5032
+ When using dictionary format:
5033
+ - Each key is a dataset name (e.g., 'extra_rows_from_matt')
5034
+ - Each value is a list of DataFrames and/or file paths
5035
+ - A __featrix_dataset_name column is automatically added
5036
+ - All tables are concatenated before training
5037
+ - Works with all file types: CSV, Parquet, JSON, JSONL
5038
+
4857
5039
  Args:
4858
5040
  session_id: ID of session with trained embedding space
4859
5041
  target_column: Name of the target column to predict
4860
5042
  target_column_type: Type of target column ("set" or "scalar")
4861
5043
  file_path: Path to DIFFERENT training file (CSV or .csv.gz) to use for predictor training.
4862
- df: pandas DataFrame with DIFFERENT training data to use for predictor training.
4863
- Use file_path OR df (not both) to train predictor on different data than your
4864
- embedding space! If neither provided, uses session's original data file.
5044
+ df: pandas DataFrame OR dict of {dataset_name: [DataFrames/file paths]} with DIFFERENT
5045
+ training data to use for predictor training. Use file_path OR df (not both) to train
5046
+ predictor on different data than your embedding space! If neither provided, uses
5047
+ session's original data file.
4865
5048
  epochs: Number of training epochs (default: 0; automatic)
4866
5049
  validation_ignore_columns: List of column names to exclude from validation queries (default: None)
4867
5050
  rare_label_value: For binary classification, which class is the rare/minority class for metrics (default: None)
@@ -4882,6 +5065,86 @@ class FeatrixSphereClient:
4882
5065
  if file_path and df is not None:
4883
5066
  raise ValueError("Provide either file_path or df, not both")
4884
5067
 
5068
+ # Handle dictionary of datasets input (same as upload_df_and_create_session)
5069
+ if df is not None and isinstance(df, dict):
5070
+ if verbose:
5071
+ print("Detected dictionary of datasets - concatenating with __featrix_dataset_name labels")
5072
+ all_dataframes = []
5073
+ total_rows = 0
5074
+
5075
+ for dataset_name, tables in df.items():
5076
+ if not isinstance(tables, list):
5077
+ raise ValueError(f"Value for dataset '{dataset_name}' must be a list of DataFrames/file paths, got {type(tables)}")
5078
+
5079
+ for i, table in enumerate(tables):
5080
+ # Handle file path (string)
5081
+ if isinstance(table, str):
5082
+ file_path_to_load = str(table)
5083
+
5084
+ if not os.path.exists(file_path_to_load):
5085
+ raise FileNotFoundError(f"File not found in dataset '{dataset_name}': {file_path_to_load}")
5086
+
5087
+ # Determine file type and load
5088
+ file_ext = file_path_to_load.lower()
5089
+ if verbose:
5090
+ print(f" - {dataset_name} loading file: {os.path.basename(file_path_to_load)}")
5091
+
5092
+ if file_ext.endswith('.parquet'):
5093
+ loaded_df = pd.read_parquet(file_path_to_load)
5094
+ elif file_ext.endswith(('.json', '.jsonl')):
5095
+ try:
5096
+ from featrix.neural.input_data_file import featrix_wrap_read_json_file
5097
+ loaded_df = featrix_wrap_read_json_file(file_path_to_load)
5098
+ if loaded_df is None:
5099
+ raise ValueError(f"Failed to parse {'JSONL' if file_ext.endswith('.jsonl') else 'JSON'} file")
5100
+ except ImportError:
5101
+ # Fallback to pandas
5102
+ if file_ext.endswith('.jsonl'):
5103
+ import json
5104
+ records = []
5105
+ with open(file_path_to_load, 'r', encoding='utf-8') as f:
5106
+ for line in f:
5107
+ if line.strip():
5108
+ records.append(json.loads(line))
5109
+ loaded_df = pd.DataFrame(records)
5110
+ else:
5111
+ loaded_df = pd.read_json(file_path_to_load)
5112
+ elif file_ext.endswith(('.csv', '.csv.gz')):
5113
+ loaded_df = pd.read_csv(file_path_to_load)
5114
+ else:
5115
+ raise ValueError(f"Unsupported file type in dataset '{dataset_name}': {file_path_to_load}. "
5116
+ f"Supported: .csv, .csv.gz, .parquet, .json, .jsonl")
5117
+
5118
+ labeled_table = loaded_df
5119
+ if verbose:
5120
+ print(f" Loaded {len(loaded_df)} rows, {len(loaded_df.columns)} columns")
5121
+
5122
+ # Handle DataFrame
5123
+ elif isinstance(table, pd.DataFrame):
5124
+ # Create a copy to avoid modifying the original
5125
+ labeled_table = table.copy()
5126
+ if verbose:
5127
+ print(f" - {dataset_name} DataFrame {i+1}: {len(labeled_table)} rows, {len(labeled_table.columns)} columns")
5128
+
5129
+ else:
5130
+ raise ValueError(f"Table {i} in dataset '{dataset_name}' must be a pandas DataFrame or file path (str), got {type(table)}")
5131
+
5132
+ # Add the dataset name label column
5133
+ labeled_table['__featrix_dataset_name'] = dataset_name
5134
+
5135
+ all_dataframes.append(labeled_table)
5136
+ total_rows += len(labeled_table)
5137
+
5138
+ if not all_dataframes:
5139
+ raise ValueError("No DataFrames found in the provided dictionary")
5140
+
5141
+ # Concatenate all dataframes
5142
+ if verbose:
5143
+ print(f"Concatenating {len(all_dataframes)} tables from {len(df)} datasets ({total_rows} total rows)")
5144
+ df = pd.concat(all_dataframes, ignore_index=True)
5145
+ if verbose:
5146
+ print(f"Combined DataFrame: {len(df)} rows, {len(df.columns)} columns (includes __featrix_dataset_name)")
5147
+
4885
5148
  # Validate cost parameters
4886
5149
  if cost_false_positive is not None or cost_false_negative is not None:
4887
5150
  if cost_false_positive is None or cost_false_negative is None:
@@ -5079,7 +5342,7 @@ class FeatrixSphereClient:
5079
5342
  Extend embedding space training with new data.
5080
5343
 
5081
5344
  This function:
5082
- 1. Clones the existing embedding space to a new session
5345
+ 1. Creates a new session with the existing embedding space
5083
5346
  2. Uploads/processes the new data
5084
5347
  3. Continues training from where the previous training left off
5085
5348
  4. Trains for the specified number of additional epochs (data_passes)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: featrixsphere
3
- Version: 0.2.1830
3
+ Version: 0.2.2280
4
4
  Summary: Transform any CSV into a production-ready ML model in minutes, not months.
5
5
  Home-page: https://github.com/Featrix/sphere
6
6
  Author: Featrix