featrixsphere 0.2.5563__py3-none-any.whl → 0.2.5978__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
featrixsphere/client.py CHANGED
@@ -215,18 +215,18 @@ class PredictionBatch:
215
215
  )
216
216
 
217
217
  # Populate cache with results
218
- predictions = batch_results.get('results', {})
218
+ predictions = batch_results.get('predictions', [])
219
219
  successful = 0
220
220
  failed = 0
221
-
222
- for queue_id, prediction in predictions.items():
221
+
222
+ for prediction in predictions:
223
223
  if isinstance(prediction, dict):
224
224
  row_index = prediction.get('row_index', 0)
225
225
  if row_index < len(records):
226
226
  record = records[row_index]
227
227
  record_hash = self._hash_record(record)
228
228
  self._cache[record_hash] = prediction
229
-
229
+
230
230
  if prediction.get('prediction') is not None:
231
231
  successful += 1
232
232
  else:
@@ -1958,20 +1958,21 @@ class FeatrixSphereClient:
1958
1958
  ax.text(0.5, 0.5, f'Error plotting embedding: {e}',
1959
1959
  transform=ax.transAxes, ha='center', va='center')
1960
1960
 
1961
- def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None) -> SessionInfo:
1961
+ def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, s3_visualization_dataset: str = None, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None) -> SessionInfo:
1962
1962
  """
1963
1963
  Create a new embedding space from S3 training and validation datasets.
1964
-
1964
+
1965
1965
  Args:
1966
1966
  name: Name for the embedding space
1967
1967
  s3_training_dataset: S3 URL for training dataset (must start with 's3://')
1968
1968
  s3_validation_dataset: S3 URL for validation dataset (must start with 's3://')
1969
+ s3_visualization_dataset: Optional S3 URL for visualization dataset for epoch projection animations (must start with 's3://')
1969
1970
  webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
1970
1971
  user_metadata: Optional user metadata for ES/SP identification (max 32KB)
1971
-
1972
+
1972
1973
  Returns:
1973
1974
  SessionInfo for the newly created embedding space session
1974
-
1975
+
1975
1976
  Raises:
1976
1977
  ValueError: If S3 URLs are invalid
1977
1978
  """
@@ -1980,16 +1981,22 @@ class FeatrixSphereClient:
1980
1981
  raise ValueError("s3_training_dataset must be a valid S3 URL (s3://...)")
1981
1982
  if not s3_validation_dataset.startswith('s3://'):
1982
1983
  raise ValueError("s3_validation_dataset must be a valid S3 URL (s3://...)")
1983
-
1984
+ if s3_visualization_dataset and not s3_visualization_dataset.startswith('s3://'):
1985
+ raise ValueError("s3_visualization_dataset must be a valid S3 URL (s3://...)")
1986
+
1984
1987
  print(f"Creating embedding space '{name}' from S3 datasets...")
1985
1988
  print(f" Training: {s3_training_dataset}")
1986
1989
  print(f" Validation: {s3_validation_dataset}")
1987
-
1990
+ if s3_visualization_dataset:
1991
+ print(f" Visualization: {s3_visualization_dataset}")
1992
+
1988
1993
  data = {
1989
1994
  "name": name,
1990
1995
  "s3_file_data_set_training": s3_training_dataset,
1991
1996
  "s3_file_data_set_validation": s3_validation_dataset
1992
1997
  }
1998
+ if s3_visualization_dataset:
1999
+ data["s3_file_data_set_visualization"] = s3_visualization_dataset
1993
2000
  if webhooks:
1994
2001
  data['webhooks'] = webhooks
1995
2002
  if user_metadata:
@@ -5578,11 +5585,33 @@ class FeatrixSphereClient:
5578
5585
 
5579
5586
  # Try to start training
5580
5587
  response_data = self._post_json(f"/compute/session/{session_id}/train_predictor", data)
5588
+
5589
+ # Check if job was queued (all nodes busy)
5590
+ if response_data.get('queued'):
5591
+ job_id = response_data.get('job_id')
5592
+ queue_position = response_data.get('queue_position', 0)
5593
+ if verbose:
5594
+ print(f"📥 Job queued (all compute nodes busy)")
5595
+ print(f" Job ID: {job_id}")
5596
+ print(f" Queue position: {queue_position}")
5597
+ print(f" Waiting for job to be dispatched...")
5598
+
5599
+ # Poll for job dispatch and completion
5600
+ return self._poll_queued_job(
5601
+ session_id=session_id,
5602
+ job_id=job_id,
5603
+ target_column=target_column,
5604
+ target_column_type=target_column_type,
5605
+ poll_interval=poll_interval,
5606
+ max_poll_time=max_poll_time,
5607
+ verbose=verbose
5608
+ )
5609
+
5581
5610
  return response_data
5582
-
5611
+
5583
5612
  except Exception as e:
5584
5613
  error_str = str(e).lower()
5585
-
5614
+
5586
5615
  # Check if this is a "job already running" error
5587
5616
  if "already running" in error_str or "job plan error" in error_str:
5588
5617
  if verbose:
@@ -5701,6 +5730,180 @@ class FeatrixSphereClient:
5701
5730
  if verbose:
5702
5731
  print(f"⚠️ Could not clean up temporary file: {cleanup_error}")
5703
5732
 
5733
+ def _poll_queued_job(self, session_id: str, job_id: str, target_column: str, target_column_type: str,
5734
+ poll_interval: int = 30, max_poll_time: int = 3600, verbose: bool = True) -> Dict[str, Any]:
5735
+ """
5736
+ Poll for a queued job to be dispatched and completed.
5737
+
5738
+ This is called when train_predictor returns a 'queued' response because
5739
+ all compute nodes were busy. We poll until:
5740
+ 1. Job is dispatched to a node and training starts
5741
+ 2. Training completes
5742
+ 3. Timeout is reached
5743
+
5744
+ Args:
5745
+ session_id: Session ID for the training job
5746
+ job_id: Queue job ID returned from the queued response
5747
+ target_column: Target column name (for return value)
5748
+ target_column_type: Target column type (for return value)
5749
+ poll_interval: Seconds between status checks
5750
+ max_poll_time: Maximum time to poll in seconds
5751
+ verbose: Whether to print status updates
5752
+
5753
+ Returns:
5754
+ Dict with training result or status
5755
+ """
5756
+ import time
5757
+ start_time = time.time()
5758
+ last_status = None
5759
+ job_dispatched = False
5760
+
5761
+ while time.time() - start_time < max_poll_time:
5762
+ try:
5763
+ # Check queue status first (while job is still in queue)
5764
+ if not job_dispatched:
5765
+ try:
5766
+ queue_status = self._get_json(f"/admin/monitor/job_queue/{job_id}")
5767
+ job_info = queue_status.get('job', {})
5768
+ queue_status_val = job_info.get('status', 'unknown')
5769
+
5770
+ if queue_status_val == 'pending':
5771
+ # Still in queue - show position
5772
+ # Get queue position by counting pending jobs
5773
+ try:
5774
+ all_jobs = self._get_json("/admin/monitor/job_queue?status=pending")
5775
+ pending_jobs = all_jobs.get('jobs', [])
5776
+ position = 0
5777
+ for i, j in enumerate(pending_jobs):
5778
+ if j.get('job_id') == job_id:
5779
+ position = i + 1
5780
+ break
5781
+ if verbose and position > 0:
5782
+ print(f"⏳ Queue position: {position} (waiting for available node)")
5783
+ except:
5784
+ if verbose:
5785
+ print(f"⏳ Job still queued (waiting for available node)")
5786
+
5787
+ elif queue_status_val == 'dispatched':
5788
+ # Job dispatched - switch to session polling
5789
+ assigned_node = job_info.get('assigned_node', 'unknown')
5790
+ if verbose:
5791
+ print(f"🚀 Job dispatched to {assigned_node}! Training starting...")
5792
+ job_dispatched = True
5793
+
5794
+ elif queue_status_val in ['completed', 'failed']:
5795
+ # Job already finished (fast completion or error)
5796
+ if queue_status_val == 'failed':
5797
+ error_msg = job_info.get('error_message', 'Unknown error')
5798
+ if verbose:
5799
+ print(f"❌ Job failed: {error_msg}")
5800
+ return {
5801
+ "message": f"Training job failed: {error_msg}",
5802
+ "session_id": session_id,
5803
+ "job_id": job_id,
5804
+ "target_column": target_column,
5805
+ "target_column_type": target_column_type,
5806
+ "status": "failed"
5807
+ }
5808
+ # If completed, continue to session status check below
5809
+ job_dispatched = True
5810
+
5811
+ except Exception as queue_error:
5812
+ # Queue endpoint might not be available - try session status
5813
+ if verbose:
5814
+ print(f"⚠️ Could not check queue status: {queue_error}")
5815
+ job_dispatched = True # Fall back to session polling
5816
+
5817
+ # Once dispatched (or we can't check queue), poll session status
5818
+ if job_dispatched:
5819
+ session_status = self.get_session_status(session_id)
5820
+ jobs = session_status.jobs
5821
+
5822
+ # Check for single predictor jobs
5823
+ sp_jobs = {k: v for k, v in jobs.items()
5824
+ if v.get('job_type') == 'train_single_predictor'}
5825
+
5826
+ if sp_jobs:
5827
+ running_jobs = []
5828
+ completed_jobs = []
5829
+ failed_jobs = []
5830
+
5831
+ for jid, job in sp_jobs.items():
5832
+ status = job.get('status', 'unknown')
5833
+ if status == 'running':
5834
+ running_jobs.append(jid)
5835
+ elif status == 'done':
5836
+ completed_jobs.append(jid)
5837
+ elif status == 'failed':
5838
+ failed_jobs.append(jid)
5839
+
5840
+ current_status = f"Running: {len(running_jobs)}, Done: {len(completed_jobs)}, Failed: {len(failed_jobs)}"
5841
+ if current_status != last_status and verbose:
5842
+ print(f"📊 Status: {current_status}")
5843
+ last_status = current_status
5844
+
5845
+ # Check if training is complete
5846
+ if not running_jobs and (completed_jobs or failed_jobs):
5847
+ if completed_jobs:
5848
+ if verbose:
5849
+ print(f"✅ Single predictor training completed successfully!")
5850
+
5851
+ try:
5852
+ metrics = self.get_training_metrics(session_id)
5853
+ return {
5854
+ "message": "Single predictor training completed successfully",
5855
+ "session_id": session_id,
5856
+ "job_id": job_id,
5857
+ "target_column": target_column,
5858
+ "target_column_type": target_column_type,
5859
+ "status": "completed",
5860
+ "training_metrics": metrics
5861
+ }
5862
+ except Exception as metrics_error:
5863
+ if verbose:
5864
+ print(f"⚠️ Training completed but couldn't fetch metrics: {metrics_error}")
5865
+ return {
5866
+ "message": "Single predictor training completed successfully",
5867
+ "session_id": session_id,
5868
+ "job_id": job_id,
5869
+ "target_column": target_column,
5870
+ "target_column_type": target_column_type,
5871
+ "status": "completed"
5872
+ }
5873
+ else:
5874
+ if verbose:
5875
+ print(f"❌ Single predictor training failed")
5876
+ return {
5877
+ "message": "Single predictor training failed",
5878
+ "session_id": session_id,
5879
+ "job_id": job_id,
5880
+ "target_column": target_column,
5881
+ "target_column_type": target_column_type,
5882
+ "status": "failed",
5883
+ "failed_jobs": failed_jobs
5884
+ }
5885
+
5886
+ time.sleep(poll_interval)
5887
+
5888
+ except Exception as poll_error:
5889
+ if verbose:
5890
+ print(f"⚠️ Error during polling: {poll_error}")
5891
+ time.sleep(poll_interval)
5892
+
5893
+ # Timeout reached
5894
+ if verbose:
5895
+ print(f"⏰ Polling timeout reached ({max_poll_time}s). Job may still be queued or training.")
5896
+
5897
+ return {
5898
+ "message": f"Polling timeout reached. Job may still be queued or training.",
5899
+ "session_id": session_id,
5900
+ "job_id": job_id,
5901
+ "target_column": target_column,
5902
+ "target_column_type": target_column_type,
5903
+ "status": "timeout",
5904
+ "poll_time": max_poll_time
5905
+ }
5906
+
5704
5907
  def extend_embedding_space_data(self, session_id: str, new_data_df=None, new_data_file: str = None,
5705
5908
  data_passes: int = 50, name: str = None, session_name_prefix: str = None,
5706
5909
  poll_interval: int = 30, max_poll_time: int = 3600,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: featrixsphere
3
- Version: 0.2.5563
3
+ Version: 0.2.5978
4
4
  Summary: Transform any CSV into a production-ready ML model in minutes, not months.
5
5
  Home-page: https://github.com/Featrix/sphere
6
6
  Author: Featrix
@@ -0,0 +1,17 @@
1
+ featrixsphere/__init__.py,sha256=nf_WC1Cpqfm0VmGonHMGq8ZbGIvgOJIQSIvL8Sedg4M,2190
2
+ featrixsphere/client.py,sha256=W-2nx5iXX1piBCGEhyBjw0ZpuQcfNUi7W6J3y4ZwMqM,450669
3
+ featrixsphere/api/__init__.py,sha256=quyvuPphVj9wb6v8Dio0SMG9iHgJAmY3asHk3f_zF10,1269
4
+ featrixsphere/api/api_endpoint.py,sha256=i3eCWuaUXftnH1Ai6MFZ7md7pC2FcRAIRO87CBZhyEQ,9000
5
+ featrixsphere/api/client.py,sha256=TdpujNsJxO4GfPMI_KoemQWV89go3KuK6OPAo9jX6Bs,12574
6
+ featrixsphere/api/foundational_model.py,sha256=ZF5wKMs6SfsNC3XYYXgbRMhnrtmLe6NeckjCCiH0fK0,21628
7
+ featrixsphere/api/http_client.py,sha256=TsOQHHNTDFGAR3mdHevj-0wy1-hPtgHXKe8Egiz5FVo,7269
8
+ featrixsphere/api/notebook_helper.py,sha256=xY9jsao26eaNiFh2s0_TlRZnR8xZ4P_e0EOKr2PtoVs,20060
9
+ featrixsphere/api/prediction_result.py,sha256=Tx7LXzF4XT-U3VqAN_IFc5DvxPnygc78M2usrD-yMu4,7521
10
+ featrixsphere/api/predictor.py,sha256=-vwCKpCfTgZKqzpDnzy1iYZQ-1-MGW8aErvxM9trktw,17652
11
+ featrixsphere/api/reference_record.py,sha256=-XOTF6ynznB3ouz06w3AF8X9SVId0g_dO20VvGNesUQ,7095
12
+ featrixsphere/api/vector_database.py,sha256=BplxKkPnAbcBX1A4KxFBJVb3qkQ-FH9zi9v2dWG5CgY,7976
13
+ featrixsphere-0.2.5978.dist-info/METADATA,sha256=AEmhOnYPaOz_Q7ojHzl6TyrxyzBTylNuxNktKPtk4Uo,16232
14
+ featrixsphere-0.2.5978.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ featrixsphere-0.2.5978.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
16
+ featrixsphere-0.2.5978.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
17
+ featrixsphere-0.2.5978.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- featrixsphere/__init__.py,sha256=6jbzCIWkLuedQFdYgS6rGKlvi47VImbOnwPv7YeeXmg,1888
2
- featrixsphere/client.py,sha256=NoIK4NMAlivtHeldgcZJmjdn2f_Ded7qeeTDxXKWr6E,440197
3
- featrixsphere-0.2.5563.dist-info/METADATA,sha256=qpkIT3C33BT8rSFqqJ7nXlv-GOhgzw1TUrJv7AYR4w4,16232
4
- featrixsphere-0.2.5563.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- featrixsphere-0.2.5563.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
6
- featrixsphere-0.2.5563.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
7
- featrixsphere-0.2.5563.dist-info/RECORD,,