PyPI - featrixsphere - Versions diffs - 0.2.1141__py3-none-any.whl → 0.2.1235__py3-none-any.whl - Mend

featrixsphere 0.2.1141py3-none-any.whl → 0.2.1235py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

featrixsphere/__init__.py CHANGED Viewed

@@ -38,7 +38,7 @@ Example:
     ...                                labels=['Experiment A', 'Experiment B'])
 """
-__version__ = "0.2.1141"
+__version__ = "0.2.1235"
 __author__ = "Featrix"
 __email__ = "support@featrix.com"
 __license__ = "MIT"

featrixsphere/client.py CHANGED Viewed

@@ -633,13 +633,97 @@ class FeatrixSphereClient:
             _client=self
         )
-    def get_model_card(self, session_id: str, max_retries: int = None) -> Dict[str, Any]:
+    def update_user_metadata(self, session_id: str, metadata: Dict[str, Any], write_mode: str = "merge") -> Dict[str, Any]:
+        """
+        Update user metadata for a session.
+        Args:
+            session_id: The session ID to update metadata for
+            metadata: Dictionary of metadata to update (max 32KB total)
+            write_mode: How to update metadata:
+                - "merge" (default): Merge new metadata with existing (existing keys are updated, new keys are added)
+                - "overwrite": Replace all user_metadata with the new dictionary
+        Returns:
+            Dictionary containing the updated session information
+        Raises:
+            requests.exceptions.HTTPError: If the request fails
+            ValueError: If write_mode is not "merge" or "overwrite"
+        Example:
+            >>> # Merge new metadata with existing
+            >>> client.update_user_metadata(
+            ...     session_id="abc123",
+            ...     metadata={"new_key": "value", "existing_key": "updated_value"},
+            ...     write_mode="merge"
+            ... )
+            >>> # Replace all metadata
+            >>> client.update_user_metadata(
+            ...     session_id="abc123",
+            ...     metadata={"only_key": "only_value"},
+            ...     write_mode="overwrite"
+            ... )
+        """
+        if write_mode not in ["merge", "overwrite"]:
+            raise ValueError(f"write_mode must be 'merge' or 'overwrite', got '{write_mode}'")
+        request_data = {
+            "user_metadata": metadata,
+            "write_mode": write_mode
+        }
+        response_data = self._post_json(f"/session/{session_id}/update_user_metadata", request_data)
+        return response_data
+    def is_foundation_model_ready(self, session_id: str, max_retries: int = None) -> Tuple[bool, str]:
+        """
+        Check if a foundation model session is ready to use (training completed).
+        Args:
+            session_id: The session ID to check
+            max_retries: Maximum number of retries (defaults to client default)
+        Returns:
+            Tuple of (is_ready: bool, status_message: str)
+            - is_ready: True if session is done and model card is available
+            - status_message: Human-readable status message
+        Example:
+            >>> is_ready, message = client.is_foundation_model_ready("session_123")
+            >>> if not is_ready:
+            ...     print(f"Foundation model not ready: {message}")
+        """
+        try:
+            session_status = self.get_session_status(session_id, max_retries=max_retries)
+            if session_status.status in ["done", "DONE"]:
+                # Check if model card exists
+                try:
+                    self.get_model_card(session_id, max_retries=max_retries, check_status_first=False)
+                    return True, "Foundation model is ready"
+                except (requests.exceptions.HTTPError, FileNotFoundError):
+                    return False, "Session is done but model card is not available yet"
+            else:
+                return False, f"Session is still {session_status.status}. Training may still be in progress."
+        except requests.exceptions.HTTPError as e:
+            if e.response.status_code == 404:
+                return False, f"Session {session_id} not found"
+            return False, f"Error checking session status: {e}"
+        except Exception as e:
+            return False, f"Error checking foundation model: {e}"
+    def get_model_card(self, session_id: str, max_retries: int = None, check_status_first: bool = True) -> Dict[str, Any]:
         """
         Get the model card JSON for a given session.
         Args:
             session_id: The session ID to get the model card for
             max_retries: Maximum number of retries (defaults to client default)
+            check_status_first: If True, check session status before fetching model card.
+                               Provides better error messages if session is still training.
         Returns:
             Dictionary containing the model card JSON data
@@ -647,12 +731,31 @@ class FeatrixSphereClient:
         Raises:
             requests.exceptions.HTTPError: If the request fails
             FileNotFoundError: If the model card doesn't exist (404)
+            ValueError: If session is not ready and check_status_first is True
         Example:
             >>> client = FeatrixSphereClient()
             >>> model_card = client.get_model_card("session_123")
             >>> print(model_card["model_details"]["name"])
         """
+        # Check session status first to provide better error messages
+        if check_status_first:
+            try:
+                session_status = self.get_session_status(session_id, max_retries=max_retries)
+                if session_status.status not in ["done", "DONE"]:
+                    raise ValueError(
+                        f"Session {session_id} is not ready (status: {session_status.status}). "
+                        f"Model card is only available after training completes. "
+                        f"Use wait_for_session_completion() to wait for training to finish."
+                    )
+            except requests.exceptions.HTTPError as e:
+                # If we can't get status, continue and let the model_card request fail
+                # This handles cases where the session doesn't exist
+                if e.response.status_code == 404:
+                    raise FileNotFoundError(f"Session {session_id} not found") from e
+                # For other HTTP errors, continue to try model_card request
+                pass
         response = self._make_request(
             "GET",
             f"/session/{session_id}/model_card",
@@ -660,6 +763,77 @@ class FeatrixSphereClient:
         )
         return response.json()
+    def publish_session(self, session_id: str) -> Dict[str, Any]:
+        """
+        Publish a session by moving it to /sphere/published/<sessionId>.
+        Moves both the session file and output directory.
+        Args:
+            session_id: Session ID to publish
+        Returns:
+            Response with published_path, output_path, and status
+        Example:
+            ```python
+            result = client.publish_session("abc123")
+            print(f"Published to: {result['published_path']}")
+            ```
+        """
+        response_data = self._post_json(f"/compute/session/{session_id}/publish", {})
+        return response_data
+    def deprecate_session(self, session_id: str, warning_message: str, expiration_date: str) -> Dict[str, Any]:
+        """
+        Deprecate a published session with a warning message and expiration date.
+        The session remains available until the expiration date.
+        Args:
+            session_id: Session ID to deprecate
+            warning_message: Warning message to display about deprecation
+            expiration_date: ISO format date string when session will be removed (e.g., "2025-12-31T23:59:59Z")
+        Returns:
+            Response with deprecation status
+        Example:
+            ```python
+            from datetime import datetime, timedelta
+            expiration = (datetime.now() + timedelta(days=90)).isoformat() + "Z"
+            result = client.deprecate_session(
+                session_id="abc123",
+                warning_message="This session will be removed on 2025-12-31",
+                expiration_date=expiration
+            )
+            ```
+        """
+        data = {
+            "warning_message": warning_message,
+            "expiration_date": expiration_date
+        }
+        response_data = self._post_json(f"/compute/session/{session_id}/deprecate", data)
+        return response_data
+    def unpublish_session(self, session_id: str) -> Dict[str, Any]:
+        """
+        Unpublish a session by moving it back from /sphere/published/<sessionId>.
+        Args:
+            session_id: Session ID to unpublish
+        Returns:
+            Response with unpublish status
+        Example:
+            ```python
+            result = client.unpublish_session("abc123")
+            print(f"Status: {result['status']}")
+            ```
+        """
+        response_data = self._post_json(f"/compute/session/{session_id}/unpublish", {})
+        return response_data
     def get_sessions_for_org(self, name_prefix: str, max_retries: int = None) -> Dict[str, Any]:
         """
         Get all sessions matching a name prefix across all compute nodes.
@@ -703,8 +877,8 @@ class FeatrixSphereClient:
             >>> print(f"Model card recreated: {model_card['model_info']['name']}")
         """
         response = self._make_request(
-            "POST",
-            f"/session/{session_id}/model_card",
+            "GET",
+            f"/compute/session/{session_id}/model_card",
             max_retries=max_retries
         )
         return response.json()
@@ -1553,12 +1727,54 @@ class FeatrixSphereClient:
     # File Upload
     # =========================================================================
+    def upload_file(self, file_path: str) -> str:
+        """
+        Upload a file to the server without creating a session.
+        Returns the filename that can be used in training requests.
+        Args:
+            file_path: Path to the file to upload
+        Returns:
+            Filename (relative path) that can be used in training requests
+        """
+        from pathlib import Path as PathLib
+        file_path_obj = PathLib(file_path)
+        if not file_path_obj.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+        with open(file_path_obj, 'rb') as f:
+            files = {'file': (file_path_obj.name, f, 'text/csv' if file_path_obj.suffix == '.csv' else 'application/gzip')}
+            response = self._make_request("POST", "/compute/upload_file", files=files)
+        response_data = response.json()
+        # Handle S3 upload response (returns s3_url and filename)
+        if 's3_url' in response_data:
+            # S3 upload - extract filename from key or use returned filename
+            filename = response_data.get('filename')
+            if not filename:
+                # Extract from S3 key if filename not provided
+                s3_key = response_data.get('key', '')
+                if s3_key:
+                    filename = PathLib(s3_key).name
+            if not filename:
+                raise ValueError("Server did not return filename in S3 upload response")
+            return filename
+        # Handle local file upload response (returns filename)
+        filename = response_data.get('filename')
+        if not filename:
+            raise ValueError("Server did not return filename in upload response")
+        return filename
     def upload_file_and_create_session(self, file_path: Path, session_name_prefix: str = None, name: str = None, webhooks: Dict[str, str] = None) -> SessionInfo:
         """
-        Upload a CSV file and create a new session.
+        Upload a CSV, Parquet, JSON, or JSONL file and create a new session.
         Args:
-            file_path: Path to the CSV file to upload
+            file_path: Path to the CSV, Parquet, JSON, or JSONL file to upload
             session_name_prefix: Optional prefix for the session ID. Session will be named <prefix>-<full-uuid>
             name: Optional name for the embedding space/model (for identification and metadata)
             webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
@@ -1622,7 +1838,7 @@ class FeatrixSphereClient:
                                     webhooks: Dict[str, str] = None,
                                     epochs: int = None) -> SessionInfo:
         """
-        Upload a pandas DataFrame or CSV file and create a new session.
+        Upload a pandas DataFrame, CSV file, Parquet file, JSON file, or JSONL file and create a new session.
         Special Column: __featrix_train_predictor
         ------------------------------------------
@@ -1630,7 +1846,7 @@ class FeatrixSphereClient:
         which rows are used for single predictor training.
         How it works:
-        - Add a boolean column "__featrix_train_predictor" to your DataFrame/CSV before upload
+        - Add a boolean column "__featrix_train_predictor" to your DataFrame/CSV/Parquet/JSON/JSONL before upload
         - Set it to True for rows you want to use for predictor training
         - Set it to False (or any other value) for rows to exclude from predictor training
         - Embedding space training uses ALL rows (ignores this column)
@@ -1664,7 +1880,7 @@ class FeatrixSphereClient:
         Args:
             df: pandas DataFrame to upload (optional if file_path is provided)
             filename: Name to give the uploaded file (default: "data.csv")
-            file_path: Path to CSV file to upload (optional if df is provided)
+            file_path: Path to CSV, Parquet, JSON, or JSONL file to upload (optional if df is provided)
             column_overrides: Dict mapping column names to types ("scalar", "set", "free_string", "free_string_list")
             column_types: Alias for column_overrides (for backward compatibility)
             string_list_delimiter: Delimiter for free_string_list columns (default: "|")
@@ -1705,21 +1921,90 @@ class FeatrixSphereClient:
             if not os.path.exists(file_path):
                 raise FileNotFoundError(f"File not found: {file_path}")
-            # Check if it's a CSV file
-            if not file_path.lower().endswith(('.csv', '.csv.gz')):
-                raise ValueError("File must be a CSV file (with .csv or .csv.gz extension)")
+            # Check if it's a supported file type
+            file_ext = file_path.lower()
+            if not file_ext.endswith(('.csv', '.csv.gz', '.parquet', '.json', '.jsonl')):
+                raise ValueError("File must be a CSV, Parquet, JSON, or JSONL file (with .csv, .csv.gz, .parquet, .json, or .jsonl extension)")
             print(f"Uploading file: {file_path}")
             # Read the file content
             if file_path.endswith('.gz'):
-                # Already gzipped
+                # Already gzipped CSV
                 with gzip.open(file_path, 'rb') as f:
                     file_content = f.read()
                 upload_filename = os.path.basename(file_path)
                 content_type = 'application/gzip'
+            elif file_path.lower().endswith(('.json', '.jsonl')):
+                # JSON/JSONL file - read as DataFrame, convert to CSV, then compress
+                print(f"Reading {'JSONL' if file_path.lower().endswith('.jsonl') else 'JSON'} file...")
+                try:
+                    from featrix.neural.input_data_file import featrix_wrap_read_json_file
+                    json_df = featrix_wrap_read_json_file(file_path)
+                    if json_df is None:
+                        raise ValueError(f"Failed to parse {'JSONL' if file_path.lower().endswith('.jsonl') else 'JSON'} file")
+                except ImportError:
+                    # Fallback to pandas if featrix wrapper not available
+                    if file_path.lower().endswith('.jsonl'):
+                        # JSONL: one JSON object per line
+                        import json
+                        records = []
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            for line in f:
+                                if line.strip():
+                                    records.append(json.loads(line))
+                        json_df = pd.DataFrame(records)
+                    else:
+                        # Regular JSON
+                        json_df = pd.read_json(file_path)
+                # Clean NaN values before CSV conversion
+                cleaned_df = json_df.where(pd.notna(json_df), None)
+                # Convert to CSV and compress
+                csv_buffer = io.StringIO()
+                cleaned_df.to_csv(csv_buffer, index=False)
+                csv_data = csv_buffer.getvalue().encode('utf-8')
+                print(f"Compressing {'JSONL' if file_path.lower().endswith('.jsonl') else 'JSON'} (converted to CSV)...")
+                compressed_buffer = io.BytesIO()
+                with gzip.GzipFile(fileobj=compressed_buffer, mode='wb') as gz:
+                    gz.write(csv_data)
+                file_content = compressed_buffer.getvalue()
+                upload_filename = os.path.basename(file_path).replace('.jsonl', '.csv.gz').replace('.json', '.csv.gz')
+                content_type = 'application/gzip'
+                original_size = len(csv_data)
+                compressed_size = len(file_content)
+                compression_ratio = (1 - compressed_size / original_size) * 100
+                print(f"Converted {'JSONL' if file_path.lower().endswith('.jsonl') else 'JSON'} to CSV and compressed from {original_size:,} to {compressed_size:,} bytes ({compression_ratio:.1f}% reduction)")
+            elif file_path.lower().endswith('.parquet'):
+                # Parquet file - read as DataFrame, convert to CSV, then compress
+                print("Reading Parquet file...")
+                parquet_df = pd.read_parquet(file_path)
+                # Clean NaN values before CSV conversion
+                cleaned_df = parquet_df.where(pd.notna(parquet_df), None)
+                # Convert to CSV and compress
+                csv_buffer = io.StringIO()
+                cleaned_df.to_csv(csv_buffer, index=False)
+                csv_data = csv_buffer.getvalue().encode('utf-8')
+                print("Compressing Parquet (converted to CSV)...")
+                compressed_buffer = io.BytesIO()
+                with gzip.GzipFile(fileobj=compressed_buffer, mode='wb') as gz:
+                    gz.write(csv_data)
+                file_content = compressed_buffer.getvalue()
+                upload_filename = os.path.basename(file_path).replace('.parquet', '.csv.gz')
+                content_type = 'application/gzip'
+                original_size = len(csv_data)
+                compressed_size = len(file_content)
+                compression_ratio = (1 - compressed_size / original_size) * 100
+                print(f"Converted Parquet to CSV and compressed from {original_size:,} to {compressed_size:,} bytes ({compression_ratio:.1f}% reduction)")
             else:
-                # Read CSV and compress it
+                # Regular CSV file - read and compress it
                 with open(file_path, 'rb') as f:
                     csv_content = f.read()
@@ -3868,9 +4153,10 @@ class FeatrixSphereClient:
     def train_on_foundational_model(self, foundation_model_id: str, target_column: str, target_column_type: str,
                                     input_filename: str = None,
+                                    df = None,
                                     name: str = None,
                                     session_name_prefix: str = None,
-                                    epochs: int = 0, batch_size: int = 0, learning_rate: float = 0.001,
+                                    epochs: int = 0,
                                     rare_label_value: str = None,
                                     class_imbalance: dict = None,
                                     optimize_for: str = "balanced",
@@ -3889,11 +4175,11 @@ class FeatrixSphereClient:
             target_column: Name of the target column to predict
             target_column_type: Type of target column ("set" or "scalar")
             input_filename: Optional input data file (uses foundation model's data if not provided)
+            df: Optional pandas DataFrame with training data (uses foundation model's data if not provided).
+                Use input_filename OR df (not both) to train predictor on different data than the foundation model.
             name: Optional name for the new session
             session_name_prefix: Optional prefix for session ID. Session will be named <prefix>-<uuid>
             epochs: Number of training epochs (default: 0; automatic)
-            batch_size: Training batch size (default: 0; automatic)
-            learning_rate: Learning rate for training (default: 0.001)
             rare_label_value: For binary classification, which class is the rare/minority class for metrics (default: None)
             class_imbalance: Expected class ratios/counts from real world for sampled data (default: None)
             optimize_for: Optimization target - "balanced" (F1 score), "precision", or "recall" (default: "balanced")
@@ -3908,44 +4194,122 @@ class FeatrixSphereClient:
         print(f"Training predictor on foundation model {foundation_model_id}...")
         print(f"  Target: {target_column} ({target_column_type})")
-        data = {
-            "foundation_model_id": foundation_model_id,
-            "target_column": target_column,
-            "target_column_type": target_column_type,
-            "epochs": epochs,
-            "batch_size": batch_size,
-            "learning_rate": learning_rate,
-            "optimize_for": optimize_for,
-        }
-        if input_filename:
-            # Clean up input_filename: extract just the filename if it's an absolute path
-            # The file should be uploaded first or already exist on the server
-            from pathlib import Path
-            input_path = Path(input_filename)
-            if input_path.is_absolute():
-                # Extract just the filename - client should upload file first
-                cleaned_filename = input_path.name
-                print(f"⚠️  Note: Extracted filename '{cleaned_filename}' from absolute path '{input_filename}'")
-                print(f"   Make sure the file has been uploaded to the server first")
-                data["input_filename"] = cleaned_filename
-            else:
-                data["input_filename"] = input_filename
-        if name:
-            data["name"] = name
-        if session_name_prefix:
-            data["session_name_prefix"] = session_name_prefix
-        if rare_label_value:
-            data["rare_label_value"] = rare_label_value
-        if class_imbalance:
-            data["class_imbalance"] = class_imbalance
-        if webhooks:
-            data["webhooks"] = webhooks
+        # Get the compute cluster from the foundation model session
+        # This ensures we upload files to the same node where the foundation model lives
+        foundation_session = self.get_session_status(foundation_model_id)
+        foundation_compute_cluster = self.get_last_server_metadata()
+        foundation_compute_cluster = foundation_compute_cluster.get('compute_cluster') if foundation_compute_cluster else None
+        # Temporarily set compute cluster for file uploads if we found one
+        original_compute_cluster = self.compute_cluster
+        original_headers = self.session.headers.copy()
+        if foundation_compute_cluster:
+            self.set_compute_cluster(foundation_compute_cluster)
+            if verbose:
+                print(f"  Using compute cluster: {foundation_compute_cluster}")
-        response_data = self._post_json("/compute/train_on_foundational_model", data)
+        try:
+            # Validate that only one data source is provided
+            if input_filename and df is not None:
+                raise ValueError("Provide either input_filename or df, not both")
+            # If DataFrame provided, save to temp file and upload it
+            temp_file = None
+            if df is not None:
+                import pandas as pd
+                import tempfile
+                import os
+                if not isinstance(df, pd.DataFrame):
+                    raise ValueError("df must be a pandas DataFrame")
+            if verbose:
+                print(f"📊 Using provided DataFrame ({len(df)} rows, {len(df.columns)} columns)")
+            # Create temporary CSV file
+            temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False)
+            temp_file_path = temp_file.name
+            temp_file.close()
+            # Save DataFrame to temp file
+            df.to_csv(temp_file_path, index=False)
+            if verbose:
+                print(f"📁 Saved to temporary file: {os.path.basename(temp_file_path)}")
+                print(f"📤 Uploading file to server...")
+            # Upload the file
+            uploaded_filename = self.upload_file(temp_file_path)
+            input_filename = uploaded_filename
+            if verbose:
+                print(f"✅ File uploaded: {input_filename}")
+            # Clean up temp file
+            try:
+                os.unlink(temp_file_path)
+            except Exception:
+                pass  # Ignore cleanup errors
-        new_session_id = response_data.get('session_id')
-        print(f"✅ Predictor training session created: {new_session_id}")
+            data = {
+                "foundation_model_id": foundation_model_id,
+                "target_column": target_column,
+                "target_column_type": target_column_type,
+                "epochs": epochs,
+                "optimize_for": optimize_for,
+            }
+            if input_filename:
+                # If absolute path provided, upload the file first
+                from pathlib import Path
+                input_path = Path(input_filename)
+                if input_path.is_absolute():
+                    # Upload the file first, then use the uploaded filename
+                    if not input_path.exists():
+                        raise FileNotFoundError(f"Input file not found: {input_filename}")
+                    if verbose:
+                        print(f"📤 Uploading file from absolute path: {input_filename}")
+                    # Upload the file
+                    uploaded_filename = self.upload_file(str(input_path))
+                    if verbose:
+                        print(f"✅ File uploaded as: {uploaded_filename}")
+                    data["input_filename"] = uploaded_filename
+                else:
+                    # Relative filename - assume it's already on the server
+                    data["input_filename"] = input_filename
+            if name:
+                data["name"] = name
+            if session_name_prefix:
+                data["session_name_prefix"] = session_name_prefix
+            if rare_label_value:
+                data["rare_label_value"] = rare_label_value
+            if class_imbalance:
+                data["class_imbalance"] = class_imbalance
+            if webhooks:
+                data["webhooks"] = webhooks
+            response_data = self._post_json("/compute/train_on_foundational_model", data)
+            new_session_id = response_data.get('session_id')
+            print(f"✅ Predictor training session created: {new_session_id}")
+            # Restore original compute cluster setting
+            if original_compute_cluster != self.compute_cluster:
+                if original_compute_cluster:
+                    self.set_compute_cluster(original_compute_cluster)
+                else:
+                    self.session.headers = original_headers
+        finally:
+            # Ensure we restore headers even if there's an error
+            if original_compute_cluster != self.compute_cluster:
+                if original_compute_cluster:
+                    self.set_compute_cluster(original_compute_cluster)
+                else:
+                    self.session.headers = original_headers
         if verbose:
             print(f"⏳ Waiting for training to complete...")
@@ -4751,9 +5115,8 @@ class FeatrixSphereClient:
     def train_predictor_more(self, session_id: str, epochs: int = 50,
                             predictor_id: str = None, target_column: str = None,
-                            batch_size: int = 0, learning_rate: float = None,
                             poll_interval: int = 30, max_poll_time: int = 3600,
-                            verbose: bool = True) -> Dict[str, Any]:
+                            verbose: bool = True, webhooks: Dict[str, str] = None) -> Dict[str, Any]:
         """
         Continue training an existing single predictor for more epochs.
         Loads the existing predictor and resumes training from where it left off.
@@ -4763,11 +5126,10 @@ class FeatrixSphereClient:
             epochs: Additional epochs to train (required)
             predictor_id: Predictor ID to continue training (optional, highest priority)
             target_column: Target column name to find predictor (optional, alternative to predictor_id)
-            batch_size: Batch size for continuation (0 = use existing from predictor)
-            learning_rate: Learning rate for continuation (None = use existing from predictor)
             poll_interval: Seconds between status checks (default: 30)
             max_poll_time: Maximum time to poll in seconds (default: 3600 = 1 hour)
             verbose: Whether to print status updates (default: True)
+            webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
         Returns:
             Response with continuation start confirmation or completion status
@@ -4790,15 +5152,14 @@ class FeatrixSphereClient:
         data = {
             "epochs": epochs,
-            "batch_size": batch_size,
         }
         if predictor_id:
             data["predictor_id"] = predictor_id
         if target_column:
             data["target_column"] = target_column
-        if learning_rate is not None:
-            data["learning_rate"] = learning_rate
+        if webhooks:
+            data["webhooks"] = webhooks
         if verbose:
             print(f"🔄 Continuing training for predictor on session {session_id}")
@@ -4888,6 +5249,139 @@ class FeatrixSphereClient:
                 print(f"❌ Error starting predictor continuation: {e}")
             raise
+    def foundation_model_train_more(self, session_id: str, es_id: str = None, data_passes: int = None,
+                                   epochs: int = None, poll_interval: int = 30, max_poll_time: int = 3600,
+                                   verbose: bool = True, webhooks: Dict[str, str] = None) -> Dict[str, Any]:
+        """
+        Continue training an existing foundation model (embedding space) for more epochs.
+        Loads the existing embedding space and resumes training from where it left off.
+        Args:
+            session_id: Session ID containing the trained foundation model
+            es_id: Embedding space ID (optional, uses session's ES if not provided)
+            data_passes: Additional epochs to train (preferred, default: 50)
+            epochs: Additional epochs to train (deprecated, use data_passes instead, for compatibility)
+            poll_interval: Seconds between status checks (default: 30)
+            max_poll_time: Maximum time to poll in seconds (default: 3600 = 1 hour)
+            verbose: Whether to print status updates (default: True)
+            webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
+        Returns:
+            Response with continuation start confirmation or completion status
+        Example:
+            ```python
+            # Continue training for 50 more epochs
+            result = client.foundation_model_train_more(
+                session_id="abc123",
+                data_passes=50
+            )
+            ```
+        """
+        # Support both data_passes and epochs for compatibility
+        if data_passes is None and epochs is None:
+            data_passes = 50  # Default
+        elif data_passes is None:
+            data_passes = epochs  # Use epochs if data_passes not provided
+        # If both provided, data_passes takes precedence
+        if data_passes <= 0:
+            raise ValueError("data_passes (or epochs) must be > 0 (specify additional epochs to train)")
+        data = {
+            "data_passes": data_passes,
+        }
+        if es_id:
+            data["es_id"] = es_id
+        if webhooks:
+            data["webhooks"] = webhooks
+        if verbose:
+            print(f"🔄 Continuing training for foundation model on session {session_id}")
+            print(f"   Additional epochs: {data_passes}")
+            if es_id:
+                print(f"   ES ID: {es_id}")
+        try:
+            response_data = self._post_json(f"/compute/session/{session_id}/train_foundation_model_more", data)
+            if verbose:
+                print(f"✅ Foundation model continuation started: {response_data.get('message')}")
+            # Poll for completion if requested
+            if poll_interval > 0 and max_poll_time > 0:
+                import time
+                start_time = time.time()
+                last_status = ""
+                while time.time() - start_time < max_poll_time:
+                    try:
+                        session_info = self.get_session_status(session_id)
+                        jobs = session_info.jobs if hasattr(session_info, 'jobs') else {}
+                        # Find continuation jobs
+                        es_jobs = {j_id: j for j_id, j in jobs.items()
+                                 if j.get('type') == 'train_es'}
+                        if not es_jobs:
+                            if verbose:
+                                print("✅ No continuation jobs found - training may have completed")
+                            break
+                        # Check job statuses
+                        running_jobs = [j_id for j_id, j in es_jobs.items() if j.get('status') == 'running']
+                        completed_jobs = [j_id for j_id, j in es_jobs.items() if j.get('status') == 'done']
+                        failed_jobs = [j_id for j_id, j in es_jobs.items() if j.get('status') == 'failed']
+                        current_status = f"Running: {len(running_jobs)}, Done: {len(completed_jobs)}, Failed: {len(failed_jobs)}"
+                        if current_status != last_status and verbose:
+                            print(f"📊 Status: {current_status}")
+                            last_status = current_status
+                        if not running_jobs and (completed_jobs or failed_jobs):
+                            if completed_jobs:
+                                if verbose:
+                                    print(f"✅ Foundation model continuation completed successfully!")
+                                return {
+                                    "message": "Foundation model continuation completed successfully",
+                                    "session_id": session_id,
+                                    "status": "completed",
+                                    "additional_epochs": data_passes
+                                }
+                            else:
+                                if verbose:
+                                    print(f"❌ Foundation model continuation failed")
+                                return {
+                                    "message": "Foundation model continuation failed",
+                                    "session_id": session_id,
+                                    "status": "failed",
+                                    "failed_jobs": failed_jobs
+                                }
+                        time.sleep(poll_interval)
+                    except Exception as poll_error:
+                        if verbose:
+                            print(f"⚠️ Error during polling: {poll_error}")
+                        time.sleep(poll_interval)
+                # Timeout
+                if verbose:
+                    print(f"⏱️ Polling timeout reached ({max_poll_time}s)")
+                return {
+                    "message": "Polling timeout",
+                    "session_id": session_id,
+                    "status": "timeout",
+                    "additional_epochs": data_passes
+                }
+            return response_data
+        except Exception as e:
+            if verbose:
+                print(f"❌ Error starting foundation model continuation: {e}")
+            raise
     def _train_single_predictor_with_file(
         self,
         session_id: str,
@@ -5965,7 +6459,24 @@ class FeatrixSphereClient:
         if not file_path.exists():
             raise FileNotFoundError(f"File not found: {file_path}")
-        df = pd.read_csv(file_path)
+        # Support CSV, Parquet, JSON, and JSONL files
+        file_path_str = str(file_path).lower()
+        if file_path_str.endswith('.parquet'):
+            df = pd.read_parquet(file_path)
+        elif file_path_str.endswith('.jsonl'):
+            # JSONL: one JSON object per line
+            import json
+            records = []
+            with open(file_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    if line.strip():
+                        records.append(json.loads(line))
+            df = pd.DataFrame(records)
+        elif file_path_str.endswith('.json'):
+            # Regular JSON
+            df = pd.read_json(file_path)
+        else:
+            df = pd.read_csv(file_path)
         # Convert to JSON Tables format and clean NaNs
         table_data = JSONTablesEncoder.from_dataframe(df)
@@ -6119,11 +6630,11 @@ class FeatrixSphereClient:
     def run_csv_predictions(self, session_id: str, csv_file: str, target_column: str = None,
                            sample_size: int = None, remove_target: bool = True) -> Dict[str, Any]:
         """
-        Run predictions on a CSV file with automatic accuracy calculation.
+        Run predictions on a CSV, Parquet, JSON, or JSONL file with automatic accuracy calculation.
         Args:
             session_id: ID of session with trained predictor
-            csv_file: Path to CSV file
+            csv_file: Path to CSV, Parquet, JSON, or JSONL file
             target_column: Name of target column (for accuracy calculation)
             sample_size: Number of records to test (None = all records)
             remove_target: Whether to remove target column from prediction input
@@ -6133,8 +6644,24 @@ class FeatrixSphereClient:
         """
         import pandas as pd
-        # Load CSV
-        df = pd.read_csv(csv_file)
+        # Load CSV, Parquet, JSON, or JSONL
+        csv_file_lower = csv_file.lower()
+        if csv_file_lower.endswith('.parquet'):
+            df = pd.read_parquet(csv_file)
+        elif csv_file_lower.endswith('.jsonl'):
+            # JSONL: one JSON object per line
+            import json
+            records = []
+            with open(csv_file, 'r', encoding='utf-8') as f:
+                for line in f:
+                    if line.strip():
+                        records.append(json.loads(line))
+            df = pd.DataFrame(records)
+        elif csv_file_lower.endswith('.json'):
+            # Regular JSON
+            df = pd.read_json(csv_file)
+        else:
+            df = pd.read_csv(csv_file)
         # Handle target column
         actual_values = None

{featrixsphere-0.2.1141.dist-info → featrixsphere-0.2.1235.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: featrixsphere
-Version: 0.2.1141
+Version: 0.2.1235
 Summary: Transform any CSV into a production-ready ML model in minutes, not months.
 Home-page: https://github.com/Featrix/sphere
 Author: Featrix

featrixsphere-0.2.1235.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+featrixsphere/__init__.py,sha256=Md3n-bIkA4f-Jdk1zoINUXqmnhGnM0osazYM1lk_1hY,1888
+featrixsphere/cli.py,sha256=AW9O3vCvCNJ2UxVGN66eRmeN7XLSiHJlvK6JLZ9UJXc,13358
+featrixsphere/client.py,sha256=8OKx-pUcZjStdR7Fy4yGMMcWTuqaF1DeSPnxXPcoKtQ,384045
+featrixsphere/test_client.py,sha256=4SiRbib0ms3poK0UpnUv4G0HFQSzidF3Iswo_J2cjLk,11981
+featrixsphere-0.2.1235.dist-info/METADATA,sha256=xKOgY6aH86jvurdKvPbV-_n1YnZi5ZxGtmxH9aQuGTs,16232
+featrixsphere-0.2.1235.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+featrixsphere-0.2.1235.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
+featrixsphere-0.2.1235.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
+featrixsphere-0.2.1235.dist-info/RECORD,,

featrixsphere-0.2.1141.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-featrixsphere/__init__.py,sha256=FMxe64cn4iu9Ce5UDkOAtWZQMeWSijwX-tsiTDvblkM,1888
-featrixsphere/cli.py,sha256=AW9O3vCvCNJ2UxVGN66eRmeN7XLSiHJlvK6JLZ9UJXc,13358
-featrixsphere/client.py,sha256=TsiV-nr0VbBS1jJfidk5zrhOx6StolKsSn_txH0wmmg,358958
-featrixsphere/test_client.py,sha256=4SiRbib0ms3poK0UpnUv4G0HFQSzidF3Iswo_J2cjLk,11981
-featrixsphere-0.2.1141.dist-info/METADATA,sha256=27KEfgeXQqUNAlO3HIFhYkJU43YN3RdCjTJ_-viNJow,16232
-featrixsphere-0.2.1141.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-featrixsphere-0.2.1141.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
-featrixsphere-0.2.1141.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
-featrixsphere-0.2.1141.dist-info/RECORD,,

{featrixsphere-0.2.1141.dist-info → featrixsphere-0.2.1235.dist-info}/WHEEL RENAMED Viewed

File without changes

{featrixsphere-0.2.1141.dist-info → featrixsphere-0.2.1235.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{featrixsphere-0.2.1141.dist-info → featrixsphere-0.2.1235.dist-info}/top_level.txt RENAMED Viewed

File without changes

featrixsphere 0.2.1141__py3-none-any.whl → 0.2.1235__py3-none-any.whl

featrixsphere 0.2.1141py3-none-any.whl → 0.2.1235py3-none-any.whl