featrixsphere 0.2.5566__py3-none-any.whl → 0.2.6127__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featrixsphere/__init__.py +37 -18
- featrixsphere/api/__init__.py +50 -0
- featrixsphere/api/api_endpoint.py +280 -0
- featrixsphere/api/client.py +396 -0
- featrixsphere/api/foundational_model.py +658 -0
- featrixsphere/api/http_client.py +209 -0
- featrixsphere/api/notebook_helper.py +584 -0
- featrixsphere/api/prediction_result.py +231 -0
- featrixsphere/api/predictor.py +537 -0
- featrixsphere/api/reference_record.py +227 -0
- featrixsphere/api/vector_database.py +269 -0
- featrixsphere/client.py +218 -8
- {featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/METADATA +1 -1
- featrixsphere-0.2.6127.dist-info/RECORD +17 -0
- featrixsphere-0.2.5566.dist-info/RECORD +0 -7
- {featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/WHEEL +0 -0
- {featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/entry_points.txt +0 -0
- {featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/top_level.txt +0 -0
featrixsphere/client.py
CHANGED
|
@@ -1958,20 +1958,24 @@ class FeatrixSphereClient:
|
|
|
1958
1958
|
ax.text(0.5, 0.5, f'Error plotting embedding: {e}',
|
|
1959
1959
|
transform=ax.transAxes, ha='center', va='center')
|
|
1960
1960
|
|
|
1961
|
-
def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None) -> SessionInfo:
|
|
1961
|
+
def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, s3_visualization_dataset: str = None, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None, foundation_mode: bool = None) -> SessionInfo:
|
|
1962
1962
|
"""
|
|
1963
1963
|
Create a new embedding space from S3 training and validation datasets.
|
|
1964
|
-
|
|
1964
|
+
|
|
1965
1965
|
Args:
|
|
1966
1966
|
name: Name for the embedding space
|
|
1967
1967
|
s3_training_dataset: S3 URL for training dataset (must start with 's3://')
|
|
1968
1968
|
s3_validation_dataset: S3 URL for validation dataset (must start with 's3://')
|
|
1969
|
+
s3_visualization_dataset: Optional S3 URL for visualization dataset for epoch projection animations (must start with 's3://')
|
|
1969
1970
|
webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
|
|
1970
1971
|
user_metadata: Optional user metadata for ES/SP identification (max 32KB)
|
|
1971
|
-
|
|
1972
|
+
foundation_mode: Force foundation training mode for large datasets. If True, uses foundation
|
|
1973
|
+
training (chunked iteration, SQLite-backed splits). If False, uses standard
|
|
1974
|
+
training. If None (default), auto-detects based on dataset size (>=100k rows).
|
|
1975
|
+
|
|
1972
1976
|
Returns:
|
|
1973
1977
|
SessionInfo for the newly created embedding space session
|
|
1974
|
-
|
|
1978
|
+
|
|
1975
1979
|
Raises:
|
|
1976
1980
|
ValueError: If S3 URLs are invalid
|
|
1977
1981
|
"""
|
|
@@ -1980,22 +1984,32 @@ class FeatrixSphereClient:
|
|
|
1980
1984
|
raise ValueError("s3_training_dataset must be a valid S3 URL (s3://...)")
|
|
1981
1985
|
if not s3_validation_dataset.startswith('s3://'):
|
|
1982
1986
|
raise ValueError("s3_validation_dataset must be a valid S3 URL (s3://...)")
|
|
1983
|
-
|
|
1987
|
+
if s3_visualization_dataset and not s3_visualization_dataset.startswith('s3://'):
|
|
1988
|
+
raise ValueError("s3_visualization_dataset must be a valid S3 URL (s3://...)")
|
|
1989
|
+
|
|
1984
1990
|
print(f"Creating embedding space '{name}' from S3 datasets...")
|
|
1985
1991
|
print(f" Training: {s3_training_dataset}")
|
|
1986
1992
|
print(f" Validation: {s3_validation_dataset}")
|
|
1987
|
-
|
|
1993
|
+
if s3_visualization_dataset:
|
|
1994
|
+
print(f" Visualization: {s3_visualization_dataset}")
|
|
1995
|
+
if foundation_mode is not None:
|
|
1996
|
+
print(f" Foundation mode: {'enabled' if foundation_mode else 'disabled'}")
|
|
1997
|
+
|
|
1988
1998
|
data = {
|
|
1989
1999
|
"name": name,
|
|
1990
2000
|
"s3_file_data_set_training": s3_training_dataset,
|
|
1991
2001
|
"s3_file_data_set_validation": s3_validation_dataset
|
|
1992
2002
|
}
|
|
2003
|
+
if s3_visualization_dataset:
|
|
2004
|
+
data["s3_file_data_set_visualization"] = s3_visualization_dataset
|
|
1993
2005
|
if webhooks:
|
|
1994
2006
|
data['webhooks'] = webhooks
|
|
1995
2007
|
if user_metadata:
|
|
1996
2008
|
import json
|
|
1997
2009
|
data['user_metadata'] = json.dumps(user_metadata)
|
|
1998
2010
|
print(f"User metadata: {user_metadata}")
|
|
2011
|
+
if foundation_mode is not None:
|
|
2012
|
+
data['foundation_mode'] = foundation_mode
|
|
1999
2013
|
|
|
2000
2014
|
response_data = self._post_json("/compute/create-embedding-space", data)
|
|
2001
2015
|
|
|
@@ -5578,11 +5592,33 @@ class FeatrixSphereClient:
|
|
|
5578
5592
|
|
|
5579
5593
|
# Try to start training
|
|
5580
5594
|
response_data = self._post_json(f"/compute/session/{session_id}/train_predictor", data)
|
|
5595
|
+
|
|
5596
|
+
# Check if job was queued (all nodes busy)
|
|
5597
|
+
if response_data.get('queued'):
|
|
5598
|
+
job_id = response_data.get('job_id')
|
|
5599
|
+
queue_position = response_data.get('queue_position', 0)
|
|
5600
|
+
if verbose:
|
|
5601
|
+
print(f"📥 Job queued (all compute nodes busy)")
|
|
5602
|
+
print(f" Job ID: {job_id}")
|
|
5603
|
+
print(f" Queue position: {queue_position}")
|
|
5604
|
+
print(f" Waiting for job to be dispatched...")
|
|
5605
|
+
|
|
5606
|
+
# Poll for job dispatch and completion
|
|
5607
|
+
return self._poll_queued_job(
|
|
5608
|
+
session_id=session_id,
|
|
5609
|
+
job_id=job_id,
|
|
5610
|
+
target_column=target_column,
|
|
5611
|
+
target_column_type=target_column_type,
|
|
5612
|
+
poll_interval=poll_interval,
|
|
5613
|
+
max_poll_time=max_poll_time,
|
|
5614
|
+
verbose=verbose
|
|
5615
|
+
)
|
|
5616
|
+
|
|
5581
5617
|
return response_data
|
|
5582
|
-
|
|
5618
|
+
|
|
5583
5619
|
except Exception as e:
|
|
5584
5620
|
error_str = str(e).lower()
|
|
5585
|
-
|
|
5621
|
+
|
|
5586
5622
|
# Check if this is a "job already running" error
|
|
5587
5623
|
if "already running" in error_str or "job plan error" in error_str:
|
|
5588
5624
|
if verbose:
|
|
@@ -5701,6 +5737,180 @@ class FeatrixSphereClient:
|
|
|
5701
5737
|
if verbose:
|
|
5702
5738
|
print(f"⚠️ Could not clean up temporary file: {cleanup_error}")
|
|
5703
5739
|
|
|
5740
|
+
def _poll_queued_job(self, session_id: str, job_id: str, target_column: str, target_column_type: str,
|
|
5741
|
+
poll_interval: int = 30, max_poll_time: int = 3600, verbose: bool = True) -> Dict[str, Any]:
|
|
5742
|
+
"""
|
|
5743
|
+
Poll for a queued job to be dispatched and completed.
|
|
5744
|
+
|
|
5745
|
+
This is called when train_predictor returns a 'queued' response because
|
|
5746
|
+
all compute nodes were busy. We poll until:
|
|
5747
|
+
1. Job is dispatched to a node and training starts
|
|
5748
|
+
2. Training completes
|
|
5749
|
+
3. Timeout is reached
|
|
5750
|
+
|
|
5751
|
+
Args:
|
|
5752
|
+
session_id: Session ID for the training job
|
|
5753
|
+
job_id: Queue job ID returned from the queued response
|
|
5754
|
+
target_column: Target column name (for return value)
|
|
5755
|
+
target_column_type: Target column type (for return value)
|
|
5756
|
+
poll_interval: Seconds between status checks
|
|
5757
|
+
max_poll_time: Maximum time to poll in seconds
|
|
5758
|
+
verbose: Whether to print status updates
|
|
5759
|
+
|
|
5760
|
+
Returns:
|
|
5761
|
+
Dict with training result or status
|
|
5762
|
+
"""
|
|
5763
|
+
import time
|
|
5764
|
+
start_time = time.time()
|
|
5765
|
+
last_status = None
|
|
5766
|
+
job_dispatched = False
|
|
5767
|
+
|
|
5768
|
+
while time.time() - start_time < max_poll_time:
|
|
5769
|
+
try:
|
|
5770
|
+
# Check queue status first (while job is still in queue)
|
|
5771
|
+
if not job_dispatched:
|
|
5772
|
+
try:
|
|
5773
|
+
queue_status = self._get_json(f"/admin/monitor/job_queue/{job_id}")
|
|
5774
|
+
job_info = queue_status.get('job', {})
|
|
5775
|
+
queue_status_val = job_info.get('status', 'unknown')
|
|
5776
|
+
|
|
5777
|
+
if queue_status_val == 'pending':
|
|
5778
|
+
# Still in queue - show position
|
|
5779
|
+
# Get queue position by counting pending jobs
|
|
5780
|
+
try:
|
|
5781
|
+
all_jobs = self._get_json("/admin/monitor/job_queue?status=pending")
|
|
5782
|
+
pending_jobs = all_jobs.get('jobs', [])
|
|
5783
|
+
position = 0
|
|
5784
|
+
for i, j in enumerate(pending_jobs):
|
|
5785
|
+
if j.get('job_id') == job_id:
|
|
5786
|
+
position = i + 1
|
|
5787
|
+
break
|
|
5788
|
+
if verbose and position > 0:
|
|
5789
|
+
print(f"⏳ Queue position: {position} (waiting for available node)")
|
|
5790
|
+
except:
|
|
5791
|
+
if verbose:
|
|
5792
|
+
print(f"⏳ Job still queued (waiting for available node)")
|
|
5793
|
+
|
|
5794
|
+
elif queue_status_val == 'dispatched':
|
|
5795
|
+
# Job dispatched - switch to session polling
|
|
5796
|
+
assigned_node = job_info.get('assigned_node', 'unknown')
|
|
5797
|
+
if verbose:
|
|
5798
|
+
print(f"🚀 Job dispatched to {assigned_node}! Training starting...")
|
|
5799
|
+
job_dispatched = True
|
|
5800
|
+
|
|
5801
|
+
elif queue_status_val in ['completed', 'failed']:
|
|
5802
|
+
# Job already finished (fast completion or error)
|
|
5803
|
+
if queue_status_val == 'failed':
|
|
5804
|
+
error_msg = job_info.get('error_message', 'Unknown error')
|
|
5805
|
+
if verbose:
|
|
5806
|
+
print(f"❌ Job failed: {error_msg}")
|
|
5807
|
+
return {
|
|
5808
|
+
"message": f"Training job failed: {error_msg}",
|
|
5809
|
+
"session_id": session_id,
|
|
5810
|
+
"job_id": job_id,
|
|
5811
|
+
"target_column": target_column,
|
|
5812
|
+
"target_column_type": target_column_type,
|
|
5813
|
+
"status": "failed"
|
|
5814
|
+
}
|
|
5815
|
+
# If completed, continue to session status check below
|
|
5816
|
+
job_dispatched = True
|
|
5817
|
+
|
|
5818
|
+
except Exception as queue_error:
|
|
5819
|
+
# Queue endpoint might not be available - try session status
|
|
5820
|
+
if verbose:
|
|
5821
|
+
print(f"⚠️ Could not check queue status: {queue_error}")
|
|
5822
|
+
job_dispatched = True # Fall back to session polling
|
|
5823
|
+
|
|
5824
|
+
# Once dispatched (or we can't check queue), poll session status
|
|
5825
|
+
if job_dispatched:
|
|
5826
|
+
session_status = self.get_session_status(session_id)
|
|
5827
|
+
jobs = session_status.jobs
|
|
5828
|
+
|
|
5829
|
+
# Check for single predictor jobs
|
|
5830
|
+
sp_jobs = {k: v for k, v in jobs.items()
|
|
5831
|
+
if v.get('job_type') == 'train_single_predictor'}
|
|
5832
|
+
|
|
5833
|
+
if sp_jobs:
|
|
5834
|
+
running_jobs = []
|
|
5835
|
+
completed_jobs = []
|
|
5836
|
+
failed_jobs = []
|
|
5837
|
+
|
|
5838
|
+
for jid, job in sp_jobs.items():
|
|
5839
|
+
status = job.get('status', 'unknown')
|
|
5840
|
+
if status == 'running':
|
|
5841
|
+
running_jobs.append(jid)
|
|
5842
|
+
elif status == 'done':
|
|
5843
|
+
completed_jobs.append(jid)
|
|
5844
|
+
elif status == 'failed':
|
|
5845
|
+
failed_jobs.append(jid)
|
|
5846
|
+
|
|
5847
|
+
current_status = f"Running: {len(running_jobs)}, Done: {len(completed_jobs)}, Failed: {len(failed_jobs)}"
|
|
5848
|
+
if current_status != last_status and verbose:
|
|
5849
|
+
print(f"📊 Status: {current_status}")
|
|
5850
|
+
last_status = current_status
|
|
5851
|
+
|
|
5852
|
+
# Check if training is complete
|
|
5853
|
+
if not running_jobs and (completed_jobs or failed_jobs):
|
|
5854
|
+
if completed_jobs:
|
|
5855
|
+
if verbose:
|
|
5856
|
+
print(f"✅ Single predictor training completed successfully!")
|
|
5857
|
+
|
|
5858
|
+
try:
|
|
5859
|
+
metrics = self.get_training_metrics(session_id)
|
|
5860
|
+
return {
|
|
5861
|
+
"message": "Single predictor training completed successfully",
|
|
5862
|
+
"session_id": session_id,
|
|
5863
|
+
"job_id": job_id,
|
|
5864
|
+
"target_column": target_column,
|
|
5865
|
+
"target_column_type": target_column_type,
|
|
5866
|
+
"status": "completed",
|
|
5867
|
+
"training_metrics": metrics
|
|
5868
|
+
}
|
|
5869
|
+
except Exception as metrics_error:
|
|
5870
|
+
if verbose:
|
|
5871
|
+
print(f"⚠️ Training completed but couldn't fetch metrics: {metrics_error}")
|
|
5872
|
+
return {
|
|
5873
|
+
"message": "Single predictor training completed successfully",
|
|
5874
|
+
"session_id": session_id,
|
|
5875
|
+
"job_id": job_id,
|
|
5876
|
+
"target_column": target_column,
|
|
5877
|
+
"target_column_type": target_column_type,
|
|
5878
|
+
"status": "completed"
|
|
5879
|
+
}
|
|
5880
|
+
else:
|
|
5881
|
+
if verbose:
|
|
5882
|
+
print(f"❌ Single predictor training failed")
|
|
5883
|
+
return {
|
|
5884
|
+
"message": "Single predictor training failed",
|
|
5885
|
+
"session_id": session_id,
|
|
5886
|
+
"job_id": job_id,
|
|
5887
|
+
"target_column": target_column,
|
|
5888
|
+
"target_column_type": target_column_type,
|
|
5889
|
+
"status": "failed",
|
|
5890
|
+
"failed_jobs": failed_jobs
|
|
5891
|
+
}
|
|
5892
|
+
|
|
5893
|
+
time.sleep(poll_interval)
|
|
5894
|
+
|
|
5895
|
+
except Exception as poll_error:
|
|
5896
|
+
if verbose:
|
|
5897
|
+
print(f"⚠️ Error during polling: {poll_error}")
|
|
5898
|
+
time.sleep(poll_interval)
|
|
5899
|
+
|
|
5900
|
+
# Timeout reached
|
|
5901
|
+
if verbose:
|
|
5902
|
+
print(f"⏰ Polling timeout reached ({max_poll_time}s). Job may still be queued or training.")
|
|
5903
|
+
|
|
5904
|
+
return {
|
|
5905
|
+
"message": f"Polling timeout reached. Job may still be queued or training.",
|
|
5906
|
+
"session_id": session_id,
|
|
5907
|
+
"job_id": job_id,
|
|
5908
|
+
"target_column": target_column,
|
|
5909
|
+
"target_column_type": target_column_type,
|
|
5910
|
+
"status": "timeout",
|
|
5911
|
+
"poll_time": max_poll_time
|
|
5912
|
+
}
|
|
5913
|
+
|
|
5704
5914
|
def extend_embedding_space_data(self, session_id: str, new_data_df=None, new_data_file: str = None,
|
|
5705
5915
|
data_passes: int = 50, name: str = None, session_name_prefix: str = None,
|
|
5706
5916
|
poll_interval: int = 30, max_poll_time: int = 3600,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
featrixsphere/__init__.py,sha256=tnkl4O62quDkp8n4aXzKQu1ELKyF5yfqpgmcjA9-rnY,2190
|
|
2
|
+
featrixsphere/client.py,sha256=mxUcqkhMkAsYpwJjdamQdXB_wRlnV-TUCU71xA289tA,451235
|
|
3
|
+
featrixsphere/api/__init__.py,sha256=quyvuPphVj9wb6v8Dio0SMG9iHgJAmY3asHk3f_zF10,1269
|
|
4
|
+
featrixsphere/api/api_endpoint.py,sha256=i3eCWuaUXftnH1Ai6MFZ7md7pC2FcRAIRO87CBZhyEQ,9000
|
|
5
|
+
featrixsphere/api/client.py,sha256=TdpujNsJxO4GfPMI_KoemQWV89go3KuK6OPAo9jX6Bs,12574
|
|
6
|
+
featrixsphere/api/foundational_model.py,sha256=ZF5wKMs6SfsNC3XYYXgbRMhnrtmLe6NeckjCCiH0fK0,21628
|
|
7
|
+
featrixsphere/api/http_client.py,sha256=TsOQHHNTDFGAR3mdHevj-0wy1-hPtgHXKe8Egiz5FVo,7269
|
|
8
|
+
featrixsphere/api/notebook_helper.py,sha256=xY9jsao26eaNiFh2s0_TlRZnR8xZ4P_e0EOKr2PtoVs,20060
|
|
9
|
+
featrixsphere/api/prediction_result.py,sha256=Tx7LXzF4XT-U3VqAN_IFc5DvxPnygc78M2usrD-yMu4,7521
|
|
10
|
+
featrixsphere/api/predictor.py,sha256=-vwCKpCfTgZKqzpDnzy1iYZQ-1-MGW8aErvxM9trktw,17652
|
|
11
|
+
featrixsphere/api/reference_record.py,sha256=-XOTF6ynznB3ouz06w3AF8X9SVId0g_dO20VvGNesUQ,7095
|
|
12
|
+
featrixsphere/api/vector_database.py,sha256=BplxKkPnAbcBX1A4KxFBJVb3qkQ-FH9zi9v2dWG5CgY,7976
|
|
13
|
+
featrixsphere-0.2.6127.dist-info/METADATA,sha256=Ew2yMa6rOSrsv1bSq38HXJogU0O5bliojWkbyQYGWV0,16232
|
|
14
|
+
featrixsphere-0.2.6127.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
featrixsphere-0.2.6127.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
|
|
16
|
+
featrixsphere-0.2.6127.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
|
|
17
|
+
featrixsphere-0.2.6127.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
featrixsphere/__init__.py,sha256=0xTcC19HBfAM1o3XtHqchORTD-0fFBscBomU1jP2xYc,1888
|
|
2
|
-
featrixsphere/client.py,sha256=GsGCWSvW9PhL57cgPfZZ-mkiHzXxSCaeQRZKs1kfKqY,440159
|
|
3
|
-
featrixsphere-0.2.5566.dist-info/METADATA,sha256=TEf5XbD_CjoJUq_M7NST61oo1IftZKKth43BI21hXcE,16232
|
|
4
|
-
featrixsphere-0.2.5566.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
-
featrixsphere-0.2.5566.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
|
|
6
|
-
featrixsphere-0.2.5566.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
|
|
7
|
-
featrixsphere-0.2.5566.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|