PyPI - featrixsphere - Versions diffs - 0.2.6379__py3-none-any.whl → 0.2.6710__py3-none-any.whl - Mend

featrixsphere 0.2.6379py3-none-any.whl → 0.2.6710py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

featrixsphere/__init__.py +1 -1
featrixsphere/api/client.py +28 -0
featrixsphere/api/foundational_model.py +394 -20
featrixsphere/api/http_client.py +37 -4
featrixsphere/api/prediction_result.py +98 -9
featrixsphere/api/predictor.py +77 -3
featrixsphere/client.py +27 -16
{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/METADATA +1 -1
featrixsphere-0.2.6710.dist-info/RECORD +17 -0
{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/WHEEL +1 -1
featrixsphere-0.2.6379.dist-info/RECORD +0 -17
{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/entry_points.txt +0 -0
{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/top_level.txt +0 -0

featrixsphere/__init__.py CHANGED Viewed

@@ -57,7 +57,7 @@ TWO API OPTIONS:
     >>> print(result['prediction'])
 """
-__version__ = "0.2.6379"
+__version__ = "0.2.6710"
 __author__ = "Featrix"
 __email__ = "support@featrix.com"
 __license__ = "MIT"

featrixsphere/api/client.py CHANGED Viewed

@@ -342,6 +342,34 @@ class FeatrixSphere(HTTPClientMixin):
             ground_truth=ground_truth
         )
+    def list_sessions(
+        self,
+        name_prefix: str = "",
+    ) -> List[str]:
+        """
+        List sessions matching a name prefix/search term.
+        Searches session directory names on the compute cluster for
+        partial matches (not just prefix).
+        Args:
+            name_prefix: Term to match in session names
+        Returns:
+            List of matching session ID strings
+        Example:
+            sessions = featrix.list_sessions(name_prefix="customer")
+            for sid in sessions:
+                fm = featrix.foundational_model(sid)
+                print(f"{sid}: {fm.status}")
+        """
+        params = {}
+        if name_prefix:
+            params['name_prefix'] = name_prefix
+        response = self._get_json("/compute/sessions-for-org", params=params)
+        return response.get('sessions', [])
     def health_check(self) -> Dict[str, Any]:
         """
         Check if the API server is healthy.

featrixsphere/api/foundational_model.py CHANGED Viewed

@@ -22,6 +22,21 @@ from .reference_record import ReferenceRecord
 logger = logging.getLogger(__name__)
+def _parse_datetime(value) -> Optional[datetime]:
+    """Parse a datetime from ISO string or return as-is if already datetime."""
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        return value
+    if isinstance(value, str):
+        try:
+            # Handle ISO format with or without timezone
+            return datetime.fromisoformat(value.replace('Z', '+00:00'))
+        except (ValueError, AttributeError):
+            return None
+    return None
 @dataclass
 class FoundationalModel:
     """
@@ -68,6 +83,11 @@ class FoundationalModel:
     epochs: Optional[int] = None
     final_loss: Optional[float] = None
     created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    session_type: Optional[str] = None
+    compute_cluster: Optional[str] = None
+    error_message: Optional[str] = None
+    training_progress: Optional[Dict[str, Any]] = None
     # Internal
     _ctx: Optional['ClientContext'] = field(default=None, repr=False)
@@ -88,7 +108,9 @@ class FoundationalModel:
             dimensions=response.get('d_model') or response.get('dimensions'),
             epochs=response.get('epochs') or response.get('final_epoch'),
             final_loss=response.get('final_loss'),
-            created_at=datetime.now(),
+            created_at=_parse_datetime(response.get('created_at')),
+            session_type=response.get('session_type'),
+            compute_cluster=response.get('compute_cluster'),
             _ctx=ctx,
         )
@@ -99,19 +121,22 @@ class FoundationalModel:
         ctx: 'ClientContext'
     ) -> 'FoundationalModel':
         """Load FoundationalModel from session ID."""
-        # Get session info
-        session_data = ctx.get_json(f"/compute/session/{session_id}")
+        # Get session info - response has {"session": {...}, "jobs": {...}}
+        response_data = ctx.get_json(f"/compute/session/{session_id}")
+        session = response_data.get('session', response_data)
         fm = cls(
             id=session_id,
-            name=session_data.get('name'),
-            status=session_data.get('status'),
-            created_at=datetime.now(),
+            name=session.get('name'),
+            status=session.get('status'),
+            created_at=_parse_datetime(session.get('created_at')),
+            session_type=session.get('session_type'),
+            compute_cluster=session.get('compute_cluster'),
             _ctx=ctx,
         )
-        # Try to get model info
-        fm._update_from_session(session_data)
+        # Extract model info, training stats, jobs, error_message
+        fm._update_from_session(response_data)
         return fm
@@ -439,10 +464,11 @@ class FoundationalModel:
         last_status = None
         while time.time() - start_time < max_wait_time:
-            # Get session status
-            session_data = self._ctx.get_json(f"/compute/session/{self.id}")
+            # Get session status - response has {"session": {...}, "jobs": {...}}
+            response_data = self._ctx.get_json(f"/compute/session/{self.id}")
+            session_data = response_data.get('session', response_data)
             status = session_data.get('status', 'unknown')
-            jobs = session_data.get('jobs', {})
+            jobs = response_data.get('jobs', {})
             # Look for ES training job
             es_job = None
@@ -475,7 +501,7 @@ class FoundationalModel:
             # Check completion
             if job_status == 'done' or status == 'done':
                 self.status = 'done'
-                self._update_from_session(session_data)
+                self._update_from_session(response_data)
                 if show_progress:
                     print(f"Training complete!")
                     if self.dimensions:
@@ -566,6 +592,28 @@ class FoundationalModel:
         return self._ctx.get_json(f"/session/{self.id}/projections")
+    def get_sphere_preview(self, save_path: str = None) -> bytes:
+        """
+        Get the 2D sphere projection preview image (PNG).
+        Args:
+            save_path: Optional path to save the PNG file. If provided, the image
+                      will be written to this path.
+        Returns:
+            Raw PNG image bytes.
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        png_bytes = self._ctx.get_bytes(f"/session/{self.id}/preview")
+        if save_path:
+            with open(save_path, 'wb') as f:
+                f.write(png_bytes)
+        return png_bytes
     def get_training_metrics(self) -> Dict[str, Any]:
         """Get training metrics and history."""
         if not self._ctx:
@@ -605,27 +653,73 @@ class FoundationalModel:
         return predictors
-    def _update_from_session(self, session_data: Dict[str, Any]) -> None:
-        """Update fields from session data."""
-        # Try to get model info from various places
-        model_info = session_data.get('model_info', {})
-        training_stats = session_data.get('training_stats', {})
+    def _update_from_session(self, response_data: Dict[str, Any]) -> None:
+        """Update fields from session API response.
+        The response from GET /session/{id} has structure:
+            {"session": {...}, "jobs": {...}, ...}
+        """
+        # Handle both nested and flat response formats
+        session = response_data.get('session', response_data)
+        jobs = response_data.get('jobs', {})
+        # Core session fields
+        if session.get('name') and not self.name:
+            self.name = session['name']
+        if session.get('status'):
+            self.status = session['status']
+        if session.get('session_type'):
+            self.session_type = session['session_type']
+        if session.get('compute_cluster'):
+            self.compute_cluster = session['compute_cluster']
+        if session.get('created_at') and not self.created_at:
+            self.created_at = _parse_datetime(session['created_at'])
+        if session.get('finished_at'):
+            self.updated_at = _parse_datetime(session['finished_at'])
+        elif session.get('started_at'):
+            self.updated_at = _parse_datetime(session['started_at'])
+        # Model info from session
+        model_info = session.get('model_info', {})
+        training_stats = session.get('training_stats', {})
         self.dimensions = (
             model_info.get('d_model') or
             model_info.get('embedding_dim') or
-            session_data.get('d_model')
+            session.get('d_model')
         )
         self.epochs = (
             training_stats.get('final_epoch') or
             training_stats.get('epochs_trained') or
-            session_data.get('epochs')
+            session.get('epochs')
         )
         self.final_loss = (
             training_stats.get('final_loss') or
-            session_data.get('final_loss')
+            session.get('final_loss')
         )
+        # Extract error_message and training_progress from jobs
+        for job_id, job in jobs.items():
+            job_type = job.get('job_type', '')
+            job_status = job.get('status', '')
+            # Training progress from ES training job
+            if job_type in ('train_embedding_space', 'train_es', 'training'):
+                current_epoch = job.get('current_epoch') or job.get('epoch')
+                total_epochs = job.get('total_epochs') or job.get('epochs')
+                if current_epoch or total_epochs:
+                    self.training_progress = {
+                        'current_epoch': current_epoch,
+                        'total_epochs': total_epochs,
+                        'job_status': job_status,
+                    }
+            # Error message from any failed job
+            if job_status in ('failed', 'error'):
+                err = job.get('error') or job.get('error_message')
+                if err:
+                    self.error_message = err
     def _clean_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
         """Clean a record for API submission."""
         import math
@@ -640,6 +734,281 @@ class FoundationalModel:
             cleaned[key] = value
         return cleaned
+    def get_columns(self) -> List[str]:
+        """
+        Get the column names in this foundational model's embedding space.
+        Returns:
+            List of column name strings
+        Example:
+            columns = fm.get_columns()
+            print(columns)  # ['age', 'income', 'city', ...]
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        response = self._ctx.get_json(f"/compute/session/{self.id}/columns")
+        return response.get('columns', [])
+    @property
+    def columns(self) -> List[str]:
+        """Column names in this foundational model's embedding space."""
+        return self.get_columns()
+    @property
+    def schema_metadata(self) -> Dict[str, Any]:
+        """Get schema metadata including column names and types.
+        Returns:
+            Dict with 'column_names', 'column_types', and 'num_columns'
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        return self._ctx.get_json(f"/compute/session/{self.id}/columns")
+    def clone(
+        self,
+        target_compute_cluster: Optional[str] = None,
+        new_name: Optional[str] = None,
+        source_compute_cluster: Optional[str] = None,
+    ) -> 'FoundationalModel':
+        """
+        Clone this embedding space, optionally to a different compute node.
+        Args:
+            target_compute_cluster: Target compute cluster (None = same node)
+            new_name: Name for the cloned session
+            source_compute_cluster: Source compute cluster (if routing needed)
+        Returns:
+            New FoundationalModel instance for the cloned embedding space
+        Example:
+            cloned = fm.clone(
+                target_compute_cluster="churro",
+                new_name="my-model-clone"
+            )
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        data = {
+            "to_compute": target_compute_cluster,
+            "new_session_name": new_name,
+        }
+        response = self._ctx.post_json(
+            f"/compute/session/{self.id}/clone_embedding_space",
+            data=data
+        )
+        new_session_id = response.get('new_session_id', '')
+        return FoundationalModel(
+            id=new_session_id,
+            name=new_name,
+            status="done",
+            created_at=datetime.now(),
+            _ctx=self._ctx,
+        )
+    def refresh(self) -> Dict[str, Any]:
+        """
+        Refresh this foundational model's state from the server.
+        Returns the full server-side info for this model, and updates
+        local attributes (status, epochs, dimensions, etc.).
+        Returns:
+            Full model info dictionary from the server
+        Example:
+            info = fm.refresh()
+            print(fm.status)   # Updated from server
+            print(fm.epochs)   # Updated from server
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        data = self._ctx.get_json(f"/compute/session/{self.id}")
+        self._update_from_session(data)
+        return data
+    def is_ready(self) -> bool:
+        """
+        Check if this foundational model has finished training and is ready for use.
+        Returns:
+            True if training is complete, False otherwise
+        Example:
+            if fm.is_ready():
+                predictor = fm.create_classifier(target_column="target")
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        data = self._ctx.get_json(f"/compute/session/{self.id}")
+        self._update_from_session(data)
+        return self.status == 'done'
+    def publish(
+        self,
+        org_id: str,
+        name: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Publish this foundational model to the production directory.
+        Published models are protected from garbage collection and available
+        across all compute nodes via the shared backplane.
+        Args:
+            org_id: Organization ID for directory organization
+            name: Name for the published model (defaults to self.name)
+        Returns:
+            dict with published_path, output_path, and status
+        Example:
+            fm = featrix.create_foundational_model(name="my_model", csv_file="data.csv")
+            fm.wait_for_training()
+            fm.publish(org_id="my_org", name="my_model_v1")
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        publish_name = name or self.name
+        if not publish_name:
+            raise ValueError("name is required (either pass it or set it on the model)")
+        data = {
+            "org_id": org_id,
+            "name": publish_name,
+        }
+        return self._ctx.post_json(f"/compute/session/{self.id}/publish", data=data)
+    def deprecate(
+        self,
+        warning_message: str,
+        expiration_date: str,
+    ) -> Dict[str, Any]:
+        """
+        Deprecate this published model with a warning and expiration date.
+        The model remains available until the expiration date. Prediction
+        responses will include a model_expiration field warning consumers.
+        Args:
+            warning_message: Warning message to display
+            expiration_date: ISO format date string (e.g., "2026-06-01T00:00:00Z")
+        Returns:
+            dict with deprecation status
+        Example:
+            from datetime import datetime, timedelta
+            expiration = (datetime.now() + timedelta(days=90)).isoformat() + "Z"
+            fm.deprecate(
+                warning_message="Replaced by v2. Migrate by expiration.",
+                expiration_date=expiration
+            )
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        data = {
+            "warning_message": warning_message,
+            "expiration_date": expiration_date,
+        }
+        return self._ctx.post_json(f"/compute/session/{self.id}/deprecate", data=data)
+    def unpublish(self) -> Dict[str, Any]:
+        """
+        Unpublish this model, moving it back from the published directory.
+        WARNING: After unpublishing, the model is subject to garbage
+        collection and may be deleted when disk space is low.
+        Returns:
+            dict with unpublish status
+        Example:
+            fm.unpublish()
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        return self._ctx.post_json(f"/compute/session/{self.id}/unpublish", data={})
+    def publish_checkpoint(
+        self,
+        name: str,
+        org_id: Optional[str] = None,
+        checkpoint_epoch: Optional[int] = None,
+        session_name_prefix: Optional[str] = None,
+        publish: bool = True,
+    ) -> 'FoundationalModel':
+        """
+        Publish a checkpoint from this model's training as a new foundation model.
+        Creates a NEW FoundationalModel from a training checkpoint with full
+        provenance tracking. Useful for snapshotting good intermediate models
+        while training continues.
+        Args:
+            name: Name for the new foundation model (required)
+            org_id: Organization ID (required if publish=True)
+            checkpoint_epoch: Which epoch checkpoint to use (None = best/latest)
+            session_name_prefix: Optional prefix for the new session ID
+            publish: Move to published directory (default: True)
+        Returns:
+            New FoundationalModel instance for the published checkpoint
+        Example:
+            # Snapshot epoch 50 while training continues
+            checkpoint_fm = fm.publish_checkpoint(
+                name="My Model v0.5",
+                org_id="my_org",
+                checkpoint_epoch=50
+            )
+            # Use immediately
+            predictor = checkpoint_fm.create_classifier(target_column="target")
+        """
+        if not self._ctx:
+            raise ValueError("FoundationalModel not connected to client")
+        if publish and not org_id:
+            raise ValueError("org_id is required when publish=True")
+        data = {
+            "name": name,
+            "publish": publish,
+        }
+        if checkpoint_epoch is not None:
+            data["checkpoint_epoch"] = checkpoint_epoch
+        if session_name_prefix:
+            data["session_name_prefix"] = session_name_prefix
+        if org_id:
+            data["org_id"] = org_id
+        response = self._ctx.post_json(
+            f"/compute/session/{self.id}/publish_partial_foundation",
+            data=data
+        )
+        new_fm = FoundationalModel(
+            id=response.get("foundation_session_id", ""),
+            name=name,
+            status="done",
+            epochs=response.get("checkpoint_epoch"),
+            created_at=datetime.now(),
+            _ctx=self._ctx,
+        )
+        return new_fm
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary representation."""
         return {
@@ -650,6 +1019,11 @@ class FoundationalModel:
             'epochs': self.epochs,
             'final_loss': self.final_loss,
             'created_at': self.created_at.isoformat() if self.created_at else None,
+            'updated_at': self.updated_at.isoformat() if self.updated_at else None,
+            'session_type': self.session_type,
+            'compute_cluster': self.compute_cluster,
+            'error_message': self.error_message,
+            'training_progress': self.training_progress,
         }
     def __repr__(self) -> str:

featrixsphere/api/http_client.py CHANGED Viewed

@@ -121,14 +121,34 @@ class HTTPClientMixin:
     def _unwrap_response(self, response_json: Dict[str, Any]) -> Dict[str, Any]:
         """
-        Unwrap server response, handling the 'response' wrapper if present.
+        Unwrap server response, handling wrapper formats.
-        The server sometimes wraps responses in {"response": {...}}.
+        The server may wrap responses as:
+        - {"response": {...}}
+        - {"_meta": {...}, "data": {...}}
+        Captures server metadata when present.
         """
-        if isinstance(response_json, dict) and 'response' in response_json and len(response_json) == 1:
-            return response_json['response']
+        if isinstance(response_json, dict):
+            # Handle _meta/data wrapper (captures server metadata)
+            if '_meta' in response_json and 'data' in response_json:
+                self._last_server_metadata = response_json['_meta']
+                return response_json['data']
+            # Handle response wrapper
+            if 'response' in response_json and len(response_json) == 1:
+                return response_json['response']
         return response_json
+    @property
+    def last_server_metadata(self) -> Optional[Dict[str, Any]]:
+        """
+        Metadata from the most recent server response.
+        Contains server info like compute_cluster_time, compute_cluster,
+        compute_cluster_version, etc.
+        """
+        return getattr(self, '_last_server_metadata', None)
     def _get_json(
         self,
         endpoint: str,
@@ -139,6 +159,16 @@ class HTTPClientMixin:
         response = self._make_request("GET", endpoint, max_retries=max_retries, **kwargs)
         return self._unwrap_response(response.json())
+    def _get_bytes(
+        self,
+        endpoint: str,
+        max_retries: Optional[int] = None,
+        **kwargs
+    ) -> bytes:
+        """Make a GET request and return raw bytes (for binary content like images)."""
+        response = self._make_request("GET", endpoint, max_retries=max_retries, **kwargs)
+        return response.content
     def _post_json(
         self,
         endpoint: str,
@@ -207,3 +237,6 @@ class ClientContext:
     def post_multipart(self, endpoint: str, data: Dict[str, Any] = None,
                        files: Dict[str, Any] = None, **kwargs) -> Dict[str, Any]:
         return self._client._post_multipart(endpoint, data, files, **kwargs)
+    def get_bytes(self, endpoint: str, **kwargs) -> bytes:
+        return self._client._get_bytes(endpoint, **kwargs)

featrixsphere/api/prediction_result.py CHANGED Viewed

@@ -23,11 +23,16 @@ class PredictionResult:
         prediction: Raw prediction result (class probabilities or numeric value)
         predicted_class: Predicted class name (for classification)
         confidence: Confidence score (for classification)
+        probabilities: Full probability distribution (for classification)
+        threshold: Classification threshold (for binary classification)
         query_record: Original input record
         predictor_id: ID of predictor that made this prediction
         session_id: Session ID (internal)
         timestamp: When prediction was made
         target_column: Target column name
+        guardrails: Per-column guardrail warnings (if any)
+        ignored_query_columns: Columns in input that were not used (not in training data)
+        available_query_columns: All columns the model knows about
     Usage:
         result = predictor.predict({"age": 35, "income": 50000})
@@ -35,6 +40,14 @@ class PredictionResult:
         print(result.confidence)       # 0.87
         print(result.prediction_uuid)  # UUID for feedback
+        # Check for guardrail warnings
+        if result.guardrails:
+            print(f"Warnings: {len(result.guardrails)} columns with issues")
+        # Check for ignored columns
+        if result.ignored_query_columns:
+            print(f"Ignored: {result.ignored_query_columns}")
         # Send feedback if prediction was wrong
         if result.predicted_class != actual_label:
             feedback = result.send_feedback(ground_truth=actual_label)
@@ -44,14 +57,52 @@ class PredictionResult:
     prediction_uuid: Optional[str] = None
     prediction: Optional[Union[Dict[str, float], float]] = None
     predicted_class: Optional[str] = None
+    probability: Optional[float] = None
     confidence: Optional[float] = None
+    probabilities: Optional[Dict[str, float]] = None
+    threshold: Optional[float] = None
     query_record: Optional[Dict[str, Any]] = None
+    # Documentation fields - explain what prediction/probability/confidence mean
+    readme_prediction: str = field(default="The predicted class label (for classification) or value (for regression).")
+    readme_probability: str = field(default="Raw probability of the predicted class from the model's softmax output.")
+    readme_confidence: str = field(default=(
+        "For binary classification: normalized margin from threshold. "
+        "confidence = (prob - threshold) / (1 - threshold) if predicting positive, "
+        "or (threshold - prob) / threshold if predicting negative. "
+        "Ranges from 0 (at decision boundary) to 1 (maximally certain). "
+        "For multi-class: same as probability."
+    ))
+    readme_threshold: str = field(default=(
+        "Decision boundary for binary classification. "
+        "If P(positive_class) >= threshold, predict positive; otherwise predict negative. "
+        "Calibrated to optimize F1 score on validation data."
+    ))
+    readme_probabilities: str = field(default=(
+        "Full probability distribution across all classes from the model's softmax output. "
+        "Dictionary mapping class labels to their probabilities (sum to 1.0)."
+    ))
+    readme_pos_label: str = field(default=(
+        "The class label considered 'positive' for binary classification metrics. "
+        "Threshold and confidence calculations are relative to this class."
+    ))
     predictor_id: Optional[str] = None
     session_id: Optional[str] = None
     target_column: Optional[str] = None
     timestamp: Optional[datetime] = None
     model_version: Optional[str] = None
+    # Checkpoint info from the model (epoch, metric_type, metric_value)
+    checkpoint_info: Optional[Dict[str, Any]] = None
+    # Guardrails and warnings
+    guardrails: Optional[Dict[str, Any]] = None
+    ignored_query_columns: Optional[list] = None
+    available_query_columns: Optional[list] = None
+    # Feature importance (from leave-one-out ablation)
+    feature_importance: Optional[Dict[str, float]] = None
     # Internal: client context for sending feedback
     _ctx: Optional['ClientContext'] = field(default=None, repr=False)
@@ -73,29 +124,44 @@ class PredictionResult:
         Returns:
             PredictionResult instance
         """
-        # Extract prediction data
+        # Extract prediction data - handle both formats
+        # New format: prediction is the class label, probabilities is separate
+        # Old format: prediction is the probabilities dict
         prediction = response.get('prediction')
-        predicted_class = None
-        confidence = None
-        # For classification, extract class and confidence
-        if isinstance(prediction, dict):
-            # Find the class with highest probability
+        probabilities = response.get('probabilities')
+        predicted_class = response.get('predicted_class')
+        probability = response.get('probability')
+        confidence = response.get('confidence')
+        # For old format where prediction is the probabilities dict
+        if isinstance(prediction, dict) and not probabilities:
+            probabilities = prediction
             if prediction:
                 predicted_class = max(prediction.keys(), key=lambda k: prediction[k])
-                confidence = prediction[predicted_class]
+                probability = prediction[predicted_class]
+                confidence = probability  # Old format: confidence = probability
+        elif isinstance(prediction, str) and not predicted_class:
+            # New format: prediction is already the class label
+            predicted_class = prediction
         return cls(
             prediction_uuid=response.get('prediction_uuid') or response.get('prediction_id'),
             prediction=prediction,
             predicted_class=predicted_class,
+            probability=probability,
             confidence=confidence,
+            probabilities=probabilities,
+            threshold=response.get('threshold'),
             query_record=query_record,
             predictor_id=response.get('predictor_id'),
             session_id=response.get('session_id'),
             target_column=response.get('target_column'),
             timestamp=datetime.now(),
             model_version=response.get('model_version'),
+            checkpoint_info=response.get('checkpoint_info'),
+            guardrails=response.get('guardrails'),
+            ignored_query_columns=response.get('ignored_query_columns'),
+            available_query_columns=response.get('available_query_columns'),
             _ctx=ctx,
         )
@@ -126,18 +192,41 @@ class PredictionResult:
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary representation."""
-        return {
+        result = {
             'prediction_uuid': self.prediction_uuid,
             'prediction': self.prediction,
             'predicted_class': self.predicted_class,
+            'probability': self.probability,
             'confidence': self.confidence,
+            'probabilities': self.probabilities,
+            'threshold': self.threshold,
             'query_record': self.query_record,
             'predictor_id': self.predictor_id,
             'session_id': self.session_id,
             'target_column': self.target_column,
             'timestamp': self.timestamp.isoformat() if self.timestamp else None,
             'model_version': self.model_version,
+            # Documentation
+            'readme_prediction': self.readme_prediction,
+            'readme_probability': self.readme_probability,
+            'readme_confidence': self.readme_confidence,
+            'readme_threshold': self.readme_threshold,
+            'readme_probabilities': self.readme_probabilities,
+            'readme_pos_label': self.readme_pos_label,
         }
+        # Include checkpoint_info if present
+        if self.checkpoint_info:
+            result['checkpoint_info'] = self.checkpoint_info
+        # Include guardrails if present
+        if self.guardrails:
+            result['guardrails'] = self.guardrails
+        if self.ignored_query_columns:
+            result['ignored_query_columns'] = self.ignored_query_columns
+        if self.available_query_columns:
+            result['available_query_columns'] = self.available_query_columns
+        if self.feature_importance:
+            result['feature_importance'] = self.feature_importance
+        return result
 @dataclass

featrixsphere/api/predictor.py CHANGED Viewed

@@ -105,7 +105,8 @@ class Predictor:
     def predict(
         self,
         record: Dict[str, Any],
-        best_metric_preference: Optional[str] = None
+        best_metric_preference: Optional[str] = None,
+        feature_importance: bool = False
     ) -> PredictionResult:
         """
         Make a single prediction.
@@ -113,15 +114,21 @@ class Predictor:
         Args:
             record: Input record dictionary
             best_metric_preference: Metric checkpoint to use ("roc_auc", "pr_auc", or None)
+            feature_importance: If True, compute feature importance via leave-one-out ablation
         Returns:
-            PredictionResult with prediction, confidence, and prediction_uuid
+            PredictionResult with prediction, confidence, and prediction_uuid.
+            If feature_importance=True, also includes feature_importance dict.
         Example:
             result = predictor.predict({"age": 35, "income": 50000})
             print(result.predicted_class)  # "churned"
             print(result.confidence)       # 0.87
             print(result.prediction_uuid)  # UUID for feedback
+            # With feature importance
+            result = predictor.predict(record, feature_importance=True)
+            print(result.feature_importance)  # {"income": 0.15, "age": 0.08, ...}
         """
         if not self._ctx:
             raise ValueError("Predictor not connected to client")
@@ -129,7 +136,44 @@ class Predictor:
         # Clean the record
         cleaned_record = self._clean_record(record)
-        # Build request
+        if feature_importance:
+            # Build N+1 records: original + each feature nulled out
+            columns = list(cleaned_record.keys())
+            batch = [cleaned_record]  # Original first
+            for col in columns:
+                ablated = cleaned_record.copy()
+                ablated[col] = None
+                batch.append(ablated)
+            # Single batch call
+            results = self.batch_predict(
+                batch,
+                show_progress=False,
+                best_metric_preference=best_metric_preference
+            )
+            # Compare: importance = |original_confidence - ablated_confidence|
+            original = results[0]
+            importance = {}
+            original_conf = original.confidence or 0.0
+            for i, col in enumerate(columns):
+                ablated_result = results[i + 1]
+                ablated_conf = ablated_result.confidence or 0.0
+                # Higher delta = more important
+                delta = abs(original_conf - ablated_conf)
+                importance[col] = round(delta, 4)
+            # Sort by importance (highest first)
+            original.feature_importance = dict(sorted(
+                importance.items(),
+                key=lambda x: x[1],
+                reverse=True
+            ))
+            return original
+        # Standard single prediction
         request_payload = {
             "query_record": cleaned_record,
             "predictor_id": self.id,
@@ -196,6 +240,36 @@ class Predictor:
         return results
+    def predict_csv_file(
+        self,
+        csv_path: str,
+        show_progress: bool = True,
+        best_metric_preference: Optional[str] = None
+    ) -> List[PredictionResult]:
+        """
+        Load a CSV file and run batch predictions on all rows.
+        Args:
+            csv_path: Path to the CSV file
+            show_progress: Show progress bar
+            best_metric_preference: Metric checkpoint to use
+        Returns:
+            List of PredictionResult objects
+        Example:
+            results = predictor.predict_csv_file("test_data.csv")
+            for r in results:
+                print(r.predicted_class, r.confidence)
+        """
+        import pandas as pd
+        df = pd.read_csv(csv_path)
+        return self.batch_predict(
+            df,
+            show_progress=show_progress,
+            best_metric_preference=best_metric_preference
+        )
     def explain(
         self,
         record: Dict[str, Any],

featrixsphere/client.py CHANGED Viewed

@@ -2930,24 +2930,26 @@ class FeatrixSphereClient:
     # Single Predictor Functionality
     # =========================================================================
-    def predict(self, session_id: str, record: Dict[str, Any], target_column: str = None,
+    def predict(self, session_id: str, record: Dict[str, Any], target_column: str = None,
                predictor_id: str = None, best_metric_preference: str = None,
-               max_retries: int = None, queue_batches: bool = False) -> Dict[str, Any]:
+               checkpoint_epoch: int = None, max_retries: int = None,
+               queue_batches: bool = False) -> Dict[str, Any]:
         """
         Make a single prediction for a record.
         Args:
             session_id: ID of session with trained predictor
             record: Record dictionary (without target column)
             target_column: Specific target column predictor to use (required if multiple predictors exist and predictor_id not specified)
             predictor_id: Specific predictor ID to use (recommended - more precise than target_column)
             best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (use default checkpoint) (default: None)
+            checkpoint_epoch: Specific epoch checkpoint to use (e.g., 65 for epoch 65). Overrides best_metric_preference.
             max_retries: Number of retries for errors (default: uses client default)
             queue_batches: If True, queue this prediction for batch processing instead of immediate API call
         Returns:
             Prediction result dictionary if queue_batches=False, or queue ID if queue_batches=True
         Note:
             predictor_id is recommended over target_column for precision. If both are provided, predictor_id takes precedence.
             Use client.list_predictors(session_id) to see available predictor IDs.
@@ -2958,29 +2960,31 @@ class FeatrixSphereClient:
             if should_warn:
                 call_count = len(self._prediction_call_times.get(session_id, []))
                 self._show_batching_warning(session_id, call_count)
         # If queueing is enabled, add to queue and return queue ID
         if queue_batches:
             queue_id = self._add_to_prediction_queue(session_id, record, target_column, predictor_id)
             return {"queued": True, "queue_id": queue_id}
-        # Clean NaN/Inf values
+        # Clean NaN/Inf values
         cleaned_record = self._clean_numpy_values(record)
         cleaned_record = self.replace_nans_with_nulls(cleaned_record)
         # Build request payload - let the server handle predictor resolution
         request_payload = {
             "query_record": cleaned_record,
         }
         # Include whatever the caller provided - server will figure it out
         if target_column:
             request_payload["target_column"] = target_column
         if predictor_id:
             request_payload["predictor_id"] = predictor_id
-        if best_metric_preference:
+        if checkpoint_epoch is not None:
+            request_payload["checkpoint_epoch"] = checkpoint_epoch
+        elif best_metric_preference:
             request_payload["best_metric_preference"] = best_metric_preference
         # Just send it to the server - it has all the smart fallback logic
         response_data = self._post_json(f"/session/{session_id}/predict", request_payload, max_retries=max_retries)
         return response_data
@@ -6651,8 +6655,8 @@ class FeatrixSphereClient:
     def predict_table(self, session_id: str, table_data: Dict[str, Any],
                      target_column: str = None, predictor_id: str = None,
-                     best_metric_preference: str = None, max_retries: int = None,
-                     trace: bool = False) -> Dict[str, Any]:
+                     best_metric_preference: str = None, checkpoint_epoch: int = None,
+                     max_retries: int = None, trace: bool = False) -> Dict[str, Any]:
         """
         Make batch predictions using JSON Tables format.
@@ -6661,6 +6665,8 @@ class FeatrixSphereClient:
             table_data: Data in JSON Tables format, or list of records, or dict with 'table'/'records'
             target_column: Specific target column predictor to use (required if multiple predictors exist)
             predictor_id: Specific predictor ID to use (recommended - more precise than target_column)
+            best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
+            checkpoint_epoch: Specific epoch checkpoint to use (e.g., 65). Overrides best_metric_preference.
             max_retries: Number of retries for errors (default: uses client default, recommend higher for batch)
             trace: Enable detailed debug logging (default: False)
@@ -6713,7 +6719,9 @@ class FeatrixSphereClient:
                 table_data['target_column'] = target_column
             if predictor_id:
                 table_data['predictor_id'] = predictor_id
-            if best_metric_preference:
+            if checkpoint_epoch is not None:
+                table_data['checkpoint_epoch'] = checkpoint_epoch
+            elif best_metric_preference:
                 table_data['best_metric_preference'] = best_metric_preference
         if trace:
@@ -6747,7 +6755,8 @@ class FeatrixSphereClient:
                 raise
     def predict_records(self, session_id: str, records: List[Dict[str, Any]],
-                       target_column: str = None, predictor_id: str = None, best_metric_preference: str = None,
+                       target_column: str = None, predictor_id: str = None,
+                       best_metric_preference: str = None, checkpoint_epoch: int = None,
                        batch_size: int = 2500, use_async: bool = False,
                        show_progress_bar: bool = True, print_target_column_warning: bool = True,
                        trace: bool = False) -> Dict[str, Any]:
@@ -6759,6 +6768,8 @@ class FeatrixSphereClient:
             records: List of record dictionaries
             target_column: Specific target column predictor to use (required if multiple predictors exist and predictor_id not specified)
             predictor_id: Specific predictor ID to use (recommended - more precise than target_column)
+            best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
+            checkpoint_epoch: Specific epoch checkpoint to use (e.g., 65). Overrides best_metric_preference.
             batch_size: Number of records to send per API call (default: 2500)
             use_async: Force async processing for large datasets (default: False - async disabled due to pickle issues)
             show_progress_bar: Whether to show progress bar for async jobs (default: True)

{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: featrixsphere
-Version: 0.2.6379
+Version: 0.2.6710
 Summary: Transform any CSV into a production-ready ML model in minutes, not months.
 Home-page: https://github.com/Featrix/sphere
 Author: Featrix

featrixsphere-0.2.6710.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+featrixsphere/__init__.py,sha256=QQglKOYv0bjonuO0-wOkeyyXMWhv3yK0_s5Uaap-GVk,2190
+featrixsphere/client.py,sha256=JNYAHDFtxmhQVuclO3dWEphJuxNFLi-PfvWylZWKF_4,452929
+featrixsphere/api/__init__.py,sha256=quyvuPphVj9wb6v8Dio0SMG9iHgJAmY3asHk3f_zF10,1269
+featrixsphere/api/api_endpoint.py,sha256=i3eCWuaUXftnH1Ai6MFZ7md7pC2FcRAIRO87CBZhyEQ,9000
+featrixsphere/api/client.py,sha256=TvNqrzSPQdw0A4kW48M0S3SDrBRmkc6kTY8UkzO4eRs,13426
+featrixsphere/api/foundational_model.py,sha256=0ZFO-mJs66nVRXQbM0o1fB4HmhzLBXUqbTCF46LVH1k,34925
+featrixsphere/api/http_client.py,sha256=q59-41fHua_7AwtPFCvshlSUKJ-fS0X337L9Ooyn0DI,8440
+featrixsphere/api/notebook_helper.py,sha256=xY9jsao26eaNiFh2s0_TlRZnR8xZ4P_e0EOKr2PtoVs,20060
+featrixsphere/api/prediction_result.py,sha256=HQsJdr89zWxdRx395nevN3aP7ZXZuZxB4UGX5Ykhkfk,12235
+featrixsphere/api/predictor.py,sha256=1v0ffkEjmrO3BP0PNWAXtiAU-AlOQJSiDICmW1bQbGU,20300
+featrixsphere/api/reference_record.py,sha256=-XOTF6ynznB3ouz06w3AF8X9SVId0g_dO20VvGNesUQ,7095
+featrixsphere/api/vector_database.py,sha256=BplxKkPnAbcBX1A4KxFBJVb3qkQ-FH9zi9v2dWG5CgY,7976
+featrixsphere-0.2.6710.dist-info/METADATA,sha256=_gDwyRsSfEa0thWd6IjhMeCEZF5dMc8BfLBL4J2b5HQ,16232
+featrixsphere-0.2.6710.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+featrixsphere-0.2.6710.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
+featrixsphere-0.2.6710.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
+featrixsphere-0.2.6710.dist-info/RECORD,,

{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

featrixsphere-0.2.6379.dist-info/RECORD DELETED Viewed

@@ -1,17 +0,0 @@
-featrixsphere/__init__.py,sha256=m4FTeSot2GaITV5l_kD5WrSPZKdKmVbcmRwXZE_nYJk,2190
-featrixsphere/client.py,sha256=Nj6C_Th4jyK7JQIXUJ_URok9AA0OND6DOAjoFbKhs2Q,452098
-featrixsphere/api/__init__.py,sha256=quyvuPphVj9wb6v8Dio0SMG9iHgJAmY3asHk3f_zF10,1269
-featrixsphere/api/api_endpoint.py,sha256=i3eCWuaUXftnH1Ai6MFZ7md7pC2FcRAIRO87CBZhyEQ,9000
-featrixsphere/api/client.py,sha256=TdpujNsJxO4GfPMI_KoemQWV89go3KuK6OPAo9jX6Bs,12574
-featrixsphere/api/foundational_model.py,sha256=ZF5wKMs6SfsNC3XYYXgbRMhnrtmLe6NeckjCCiH0fK0,21628
-featrixsphere/api/http_client.py,sha256=TsOQHHNTDFGAR3mdHevj-0wy1-hPtgHXKe8Egiz5FVo,7269
-featrixsphere/api/notebook_helper.py,sha256=xY9jsao26eaNiFh2s0_TlRZnR8xZ4P_e0EOKr2PtoVs,20060
-featrixsphere/api/prediction_result.py,sha256=Tx7LXzF4XT-U3VqAN_IFc5DvxPnygc78M2usrD-yMu4,7521
-featrixsphere/api/predictor.py,sha256=-vwCKpCfTgZKqzpDnzy1iYZQ-1-MGW8aErvxM9trktw,17652
-featrixsphere/api/reference_record.py,sha256=-XOTF6ynznB3ouz06w3AF8X9SVId0g_dO20VvGNesUQ,7095
-featrixsphere/api/vector_database.py,sha256=BplxKkPnAbcBX1A4KxFBJVb3qkQ-FH9zi9v2dWG5CgY,7976
-featrixsphere-0.2.6379.dist-info/METADATA,sha256=EdpmIuyoX1hr1eelFuZbN-zOwrsIsN9TupOeehDJxys,16232
-featrixsphere-0.2.6379.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-featrixsphere-0.2.6379.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
-featrixsphere-0.2.6379.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
-featrixsphere-0.2.6379.dist-info/RECORD,,

{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{featrixsphere-0.2.6379.dist-info → featrixsphere-0.2.6710.dist-info}/top_level.txt RENAMED Viewed

File without changes

featrixsphere 0.2.6379__py3-none-any.whl → 0.2.6710__py3-none-any.whl

featrixsphere 0.2.6379py3-none-any.whl → 0.2.6710py3-none-any.whl