PyPI - featrixsphere - Versions diffs - 0.2.5566__py3-none-any.whl → 0.2.6127__py3-none-any.whl - Mend

featrixsphere 0.2.5566py3-none-any.whl → 0.2.6127py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

featrixsphere/__init__.py +37 -18
featrixsphere/api/__init__.py +50 -0
featrixsphere/api/api_endpoint.py +280 -0
featrixsphere/api/client.py +396 -0
featrixsphere/api/foundational_model.py +658 -0
featrixsphere/api/http_client.py +209 -0
featrixsphere/api/notebook_helper.py +584 -0
featrixsphere/api/prediction_result.py +231 -0
featrixsphere/api/predictor.py +537 -0
featrixsphere/api/reference_record.py +227 -0
featrixsphere/api/vector_database.py +269 -0
featrixsphere/client.py +218 -8
{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/METADATA +1 -1
featrixsphere-0.2.6127.dist-info/RECORD +17 -0
featrixsphere-0.2.5566.dist-info/RECORD +0 -7
{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/WHEEL +0 -0
{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/entry_points.txt +0 -0
{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/top_level.txt +0 -0

featrixsphere/client.py CHANGED Viewed

@@ -1958,20 +1958,24 @@ class FeatrixSphereClient:
             ax.text(0.5, 0.5, f'Error plotting embedding: {e}',
                    transform=ax.transAxes, ha='center', va='center')
-    def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None) -> SessionInfo:
+    def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, s3_visualization_dataset: str = None, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None, foundation_mode: bool = None) -> SessionInfo:
         """
         Create a new embedding space from S3 training and validation datasets.
         Args:
             name: Name for the embedding space
             s3_training_dataset: S3 URL for training dataset (must start with 's3://')
             s3_validation_dataset: S3 URL for validation dataset (must start with 's3://')
+            s3_visualization_dataset: Optional S3 URL for visualization dataset for epoch projection animations (must start with 's3://')
             webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
             user_metadata: Optional user metadata for ES/SP identification (max 32KB)
+            foundation_mode: Force foundation training mode for large datasets. If True, uses foundation
+                           training (chunked iteration, SQLite-backed splits). If False, uses standard
+                           training. If None (default), auto-detects based on dataset size (>=100k rows).
         Returns:
             SessionInfo for the newly created embedding space session
         Raises:
             ValueError: If S3 URLs are invalid
         """
@@ -1980,22 +1984,32 @@ class FeatrixSphereClient:
             raise ValueError("s3_training_dataset must be a valid S3 URL (s3://...)")
         if not s3_validation_dataset.startswith('s3://'):
             raise ValueError("s3_validation_dataset must be a valid S3 URL (s3://...)")
+        if s3_visualization_dataset and not s3_visualization_dataset.startswith('s3://'):
+            raise ValueError("s3_visualization_dataset must be a valid S3 URL (s3://...)")
         print(f"Creating embedding space '{name}' from S3 datasets...")
         print(f"  Training: {s3_training_dataset}")
         print(f"  Validation: {s3_validation_dataset}")
+        if s3_visualization_dataset:
+            print(f"  Visualization: {s3_visualization_dataset}")
+        if foundation_mode is not None:
+            print(f"  Foundation mode: {'enabled' if foundation_mode else 'disabled'}")
         data = {
             "name": name,
             "s3_file_data_set_training": s3_training_dataset,
             "s3_file_data_set_validation": s3_validation_dataset
         }
+        if s3_visualization_dataset:
+            data["s3_file_data_set_visualization"] = s3_visualization_dataset
         if webhooks:
             data['webhooks'] = webhooks
         if user_metadata:
             import json
             data['user_metadata'] = json.dumps(user_metadata)
             print(f"User metadata: {user_metadata}")
+        if foundation_mode is not None:
+            data['foundation_mode'] = foundation_mode
         response_data = self._post_json("/compute/create-embedding-space", data)
@@ -5578,11 +5592,33 @@ class FeatrixSphereClient:
             # Try to start training
             response_data = self._post_json(f"/compute/session/{session_id}/train_predictor", data)
+            # Check if job was queued (all nodes busy)
+            if response_data.get('queued'):
+                job_id = response_data.get('job_id')
+                queue_position = response_data.get('queue_position', 0)
+                if verbose:
+                    print(f"📥 Job queued (all compute nodes busy)")
+                    print(f"   Job ID: {job_id}")
+                    print(f"   Queue position: {queue_position}")
+                    print(f"   Waiting for job to be dispatched...")
+                # Poll for job dispatch and completion
+                return self._poll_queued_job(
+                    session_id=session_id,
+                    job_id=job_id,
+                    target_column=target_column,
+                    target_column_type=target_column_type,
+                    poll_interval=poll_interval,
+                    max_poll_time=max_poll_time,
+                    verbose=verbose
+                )
             return response_data
         except Exception as e:
             error_str = str(e).lower()
             # Check if this is a "job already running" error
             if "already running" in error_str or "job plan error" in error_str:
                 if verbose:
@@ -5701,6 +5737,180 @@ class FeatrixSphereClient:
                     if verbose:
                         print(f"⚠️  Could not clean up temporary file: {cleanup_error}")
+    def _poll_queued_job(self, session_id: str, job_id: str, target_column: str, target_column_type: str,
+                         poll_interval: int = 30, max_poll_time: int = 3600, verbose: bool = True) -> Dict[str, Any]:
+        """
+        Poll for a queued job to be dispatched and completed.
+        This is called when train_predictor returns a 'queued' response because
+        all compute nodes were busy. We poll until:
+        1. Job is dispatched to a node and training starts
+        2. Training completes
+        3. Timeout is reached
+        Args:
+            session_id: Session ID for the training job
+            job_id: Queue job ID returned from the queued response
+            target_column: Target column name (for return value)
+            target_column_type: Target column type (for return value)
+            poll_interval: Seconds between status checks
+            max_poll_time: Maximum time to poll in seconds
+            verbose: Whether to print status updates
+        Returns:
+            Dict with training result or status
+        """
+        import time
+        start_time = time.time()
+        last_status = None
+        job_dispatched = False
+        while time.time() - start_time < max_poll_time:
+            try:
+                # Check queue status first (while job is still in queue)
+                if not job_dispatched:
+                    try:
+                        queue_status = self._get_json(f"/admin/monitor/job_queue/{job_id}")
+                        job_info = queue_status.get('job', {})
+                        queue_status_val = job_info.get('status', 'unknown')
+                        if queue_status_val == 'pending':
+                            # Still in queue - show position
+                            # Get queue position by counting pending jobs
+                            try:
+                                all_jobs = self._get_json("/admin/monitor/job_queue?status=pending")
+                                pending_jobs = all_jobs.get('jobs', [])
+                                position = 0
+                                for i, j in enumerate(pending_jobs):
+                                    if j.get('job_id') == job_id:
+                                        position = i + 1
+                                        break
+                                if verbose and position > 0:
+                                    print(f"⏳ Queue position: {position} (waiting for available node)")
+                            except:
+                                if verbose:
+                                    print(f"⏳ Job still queued (waiting for available node)")
+                        elif queue_status_val == 'dispatched':
+                            # Job dispatched - switch to session polling
+                            assigned_node = job_info.get('assigned_node', 'unknown')
+                            if verbose:
+                                print(f"🚀 Job dispatched to {assigned_node}! Training starting...")
+                            job_dispatched = True
+                        elif queue_status_val in ['completed', 'failed']:
+                            # Job already finished (fast completion or error)
+                            if queue_status_val == 'failed':
+                                error_msg = job_info.get('error_message', 'Unknown error')
+                                if verbose:
+                                    print(f"❌ Job failed: {error_msg}")
+                                return {
+                                    "message": f"Training job failed: {error_msg}",
+                                    "session_id": session_id,
+                                    "job_id": job_id,
+                                    "target_column": target_column,
+                                    "target_column_type": target_column_type,
+                                    "status": "failed"
+                                }
+                            # If completed, continue to session status check below
+                            job_dispatched = True
+                    except Exception as queue_error:
+                        # Queue endpoint might not be available - try session status
+                        if verbose:
+                            print(f"⚠️ Could not check queue status: {queue_error}")
+                        job_dispatched = True  # Fall back to session polling
+                # Once dispatched (or we can't check queue), poll session status
+                if job_dispatched:
+                    session_status = self.get_session_status(session_id)
+                    jobs = session_status.jobs
+                    # Check for single predictor jobs
+                    sp_jobs = {k: v for k, v in jobs.items()
+                              if v.get('job_type') == 'train_single_predictor'}
+                    if sp_jobs:
+                        running_jobs = []
+                        completed_jobs = []
+                        failed_jobs = []
+                        for jid, job in sp_jobs.items():
+                            status = job.get('status', 'unknown')
+                            if status == 'running':
+                                running_jobs.append(jid)
+                            elif status == 'done':
+                                completed_jobs.append(jid)
+                            elif status == 'failed':
+                                failed_jobs.append(jid)
+                        current_status = f"Running: {len(running_jobs)}, Done: {len(completed_jobs)}, Failed: {len(failed_jobs)}"
+                        if current_status != last_status and verbose:
+                            print(f"📊 Status: {current_status}")
+                            last_status = current_status
+                        # Check if training is complete
+                        if not running_jobs and (completed_jobs or failed_jobs):
+                            if completed_jobs:
+                                if verbose:
+                                    print(f"✅ Single predictor training completed successfully!")
+                                try:
+                                    metrics = self.get_training_metrics(session_id)
+                                    return {
+                                        "message": "Single predictor training completed successfully",
+                                        "session_id": session_id,
+                                        "job_id": job_id,
+                                        "target_column": target_column,
+                                        "target_column_type": target_column_type,
+                                        "status": "completed",
+                                        "training_metrics": metrics
+                                    }
+                                except Exception as metrics_error:
+                                    if verbose:
+                                        print(f"⚠️ Training completed but couldn't fetch metrics: {metrics_error}")
+                                    return {
+                                        "message": "Single predictor training completed successfully",
+                                        "session_id": session_id,
+                                        "job_id": job_id,
+                                        "target_column": target_column,
+                                        "target_column_type": target_column_type,
+                                        "status": "completed"
+                                    }
+                            else:
+                                if verbose:
+                                    print(f"❌ Single predictor training failed")
+                                return {
+                                    "message": "Single predictor training failed",
+                                    "session_id": session_id,
+                                    "job_id": job_id,
+                                    "target_column": target_column,
+                                    "target_column_type": target_column_type,
+                                    "status": "failed",
+                                    "failed_jobs": failed_jobs
+                                }
+                time.sleep(poll_interval)
+            except Exception as poll_error:
+                if verbose:
+                    print(f"⚠️ Error during polling: {poll_error}")
+                time.sleep(poll_interval)
+        # Timeout reached
+        if verbose:
+            print(f"⏰ Polling timeout reached ({max_poll_time}s). Job may still be queued or training.")
+        return {
+            "message": f"Polling timeout reached. Job may still be queued or training.",
+            "session_id": session_id,
+            "job_id": job_id,
+            "target_column": target_column,
+            "target_column_type": target_column_type,
+            "status": "timeout",
+            "poll_time": max_poll_time
+        }
     def extend_embedding_space_data(self, session_id: str, new_data_df=None, new_data_file: str = None,
                                     data_passes: int = 50, name: str = None, session_name_prefix: str = None,
                                     poll_interval: int = 30, max_poll_time: int = 3600,

{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: featrixsphere
-Version: 0.2.5566
+Version: 0.2.6127
 Summary: Transform any CSV into a production-ready ML model in minutes, not months.
 Home-page: https://github.com/Featrix/sphere
 Author: Featrix

featrixsphere-0.2.6127.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+featrixsphere/__init__.py,sha256=tnkl4O62quDkp8n4aXzKQu1ELKyF5yfqpgmcjA9-rnY,2190
+featrixsphere/client.py,sha256=mxUcqkhMkAsYpwJjdamQdXB_wRlnV-TUCU71xA289tA,451235
+featrixsphere/api/__init__.py,sha256=quyvuPphVj9wb6v8Dio0SMG9iHgJAmY3asHk3f_zF10,1269
+featrixsphere/api/api_endpoint.py,sha256=i3eCWuaUXftnH1Ai6MFZ7md7pC2FcRAIRO87CBZhyEQ,9000
+featrixsphere/api/client.py,sha256=TdpujNsJxO4GfPMI_KoemQWV89go3KuK6OPAo9jX6Bs,12574
+featrixsphere/api/foundational_model.py,sha256=ZF5wKMs6SfsNC3XYYXgbRMhnrtmLe6NeckjCCiH0fK0,21628
+featrixsphere/api/http_client.py,sha256=TsOQHHNTDFGAR3mdHevj-0wy1-hPtgHXKe8Egiz5FVo,7269
+featrixsphere/api/notebook_helper.py,sha256=xY9jsao26eaNiFh2s0_TlRZnR8xZ4P_e0EOKr2PtoVs,20060
+featrixsphere/api/prediction_result.py,sha256=Tx7LXzF4XT-U3VqAN_IFc5DvxPnygc78M2usrD-yMu4,7521
+featrixsphere/api/predictor.py,sha256=-vwCKpCfTgZKqzpDnzy1iYZQ-1-MGW8aErvxM9trktw,17652
+featrixsphere/api/reference_record.py,sha256=-XOTF6ynznB3ouz06w3AF8X9SVId0g_dO20VvGNesUQ,7095
+featrixsphere/api/vector_database.py,sha256=BplxKkPnAbcBX1A4KxFBJVb3qkQ-FH9zi9v2dWG5CgY,7976
+featrixsphere-0.2.6127.dist-info/METADATA,sha256=Ew2yMa6rOSrsv1bSq38HXJogU0O5bliojWkbyQYGWV0,16232
+featrixsphere-0.2.6127.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+featrixsphere-0.2.6127.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
+featrixsphere-0.2.6127.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
+featrixsphere-0.2.6127.dist-info/RECORD,,

featrixsphere-0.2.5566.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-featrixsphere/__init__.py,sha256=0xTcC19HBfAM1o3XtHqchORTD-0fFBscBomU1jP2xYc,1888
-featrixsphere/client.py,sha256=GsGCWSvW9PhL57cgPfZZ-mkiHzXxSCaeQRZKs1kfKqY,440159
-featrixsphere-0.2.5566.dist-info/METADATA,sha256=TEf5XbD_CjoJUq_M7NST61oo1IftZKKth43BI21hXcE,16232
-featrixsphere-0.2.5566.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-featrixsphere-0.2.5566.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
-featrixsphere-0.2.5566.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
-featrixsphere-0.2.5566.dist-info/RECORD,,

{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/WHEEL RENAMED Viewed

File without changes

{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{featrixsphere-0.2.5566.dist-info → featrixsphere-0.2.6127.dist-info}/top_level.txt RENAMED Viewed

File without changes

featrixsphere 0.2.5566__py3-none-any.whl → 0.2.6127__py3-none-any.whl

featrixsphere 0.2.5566py3-none-any.whl → 0.2.6127py3-none-any.whl