PyPI - featrixsphere - Versions diffs - 0.2.5183__py3-none-any.whl → 0.2.5566__py3-none-any.whl - Mend

featrixsphere 0.2.5183py3-none-any.whl → 0.2.5566py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

featrixsphere/__init__.py CHANGED Viewed

@@ -38,7 +38,7 @@ Example:
     ...                                labels=['Experiment A', 'Experiment B'])
 """
-__version__ = "0.2.5183"
+__version__ = "0.2.5566"
 __author__ = "Featrix"
 __email__ = "support@featrix.com"
 __license__ = "MIT"

featrixsphere/client.py CHANGED Viewed

@@ -8,6 +8,7 @@ with a focus on the new single predictor functionality.
 import json
 import time
+import logging
 import requests
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple, Union
@@ -22,6 +23,8 @@ import hashlib
 import numpy as np
 from datetime import datetime
+logger = logging.getLogger(__name__)
 # Optional imports for plotting functionality
 try:
     import matplotlib.pyplot as plt
@@ -66,6 +69,7 @@ class SessionInfo:
     jobs: Dict[str, Any]
     job_queue_positions: Dict[str, Any]
     job_plan: List[Dict[str, Any]] = field(default_factory=list)
+    compute_cluster: Optional[str] = None
     _client: Optional['FeatrixSphereClient'] = None
     def predictors(self) -> List[Dict[str, Any]]:
@@ -211,18 +215,18 @@ class PredictionBatch:
         )
         # Populate cache with results
-        predictions = batch_results.get('results', {})
+        predictions = batch_results.get('predictions', [])
         successful = 0
         failed = 0
-        for queue_id, prediction in predictions.items():
+        for prediction in predictions:
             if isinstance(prediction, dict):
                 row_index = prediction.get('row_index', 0)
                 if row_index < len(records):
                     record = records[row_index]
                     record_hash = self._hash_record(record)
                     self._cache[record_hash] = prediction
                 if prediction.get('prediction') is not None:
                     successful += 1
                 else:
@@ -239,15 +243,16 @@ class PredictionBatch:
 class FeatrixSphereClient:
     """Client for interacting with the Featrix Sphere API."""
-    def __init__(self, base_url: str = "https://sphere-api.featrix.com",
-                 default_max_retries: int = 5,
+    def __init__(self, base_url: str = "https://sphere-api.featrix.com",
+                 default_max_retries: int = 5,
                  default_timeout: int = 30,
                  retry_base_delay: float = 2.0,
                  retry_max_delay: float = 60.0,
-                 compute_cluster: str = None):
+                 compute_cluster: str = None,
+                 trace: bool = False):
         """
         Initialize the API client.
         Args:
             base_url: Base URL of the API server
             default_max_retries: Default number of retries for failed requests
@@ -255,6 +260,7 @@ class FeatrixSphereClient:
             retry_base_delay: Base delay for exponential backoff in seconds
             retry_max_delay: Maximum delay for exponential backoff in seconds
             compute_cluster: Compute cluster name (e.g., "burrito", "churro") for X-Featrix-Node header
+            trace: Enable detailed debug logging of all API requests (default: False)
         """
         self.base_url = base_url.rstrip('/')
         self.session = requests.Session()
@@ -286,6 +292,17 @@ class FeatrixSphereClient:
         self._prediction_cache = {}  # session_id -> {record_hash: prediction_result}
         self._cache_mode = {}  # session_id -> 'populate' or 'fetch'
         self._cache_stats = {}  # session_id -> {hits: int, misses: int, populated: int}
+        # Trace/debug mode
+        self.trace = trace
+        if trace:
+            import logging
+            logging.basicConfig(level=logging.INFO)
+            logger.info(f"[TRACE] FeatrixSphereClient initialized")
+            logger.info(f"[TRACE]   base_url: {self.base_url}")
+            logger.info(f"[TRACE]   compute_cluster: {compute_cluster}")
+            logger.info(f"[TRACE]   default_timeout: {default_timeout}")
+            logger.info(f"[TRACE]   headers: {dict(self.session.headers)}")
     def set_compute_cluster(self, cluster: str) -> None:
         """
@@ -349,13 +366,30 @@ class FeatrixSphereClient:
         url = f"{self.base_url}{endpoint}"
         start_time = time.time()
         attempt = 0
+        if self.trace:
+            logger.info(f"[TRACE] {method} {url}")
+            logger.info(f"[TRACE]   headers: {dict(self.session.headers)}")
+            if 'json' in kwargs:
+                json_data = kwargs['json']
+                if isinstance(json_data, dict):
+                    logger.info(f"[TRACE]   json keys: {list(json_data.keys())}")
+                    # Log sample of data without flooding
+                    if 'records' in json_data:
+                        logger.info(f"[TRACE]   num records: {len(json_data.get('records', []))}")
+                    if 'table' in json_data:
+                        logger.info(f"[TRACE]   has table data")
         while True:
             attempt += 1
             elapsed = time.time() - start_time
             try:
                 response = self.session.request(method, url, **kwargs)
+                if self.trace:
+                    logger.info(f"[TRACE]   response: HTTP {response.status_code} ({elapsed:.2f}s)")
+                    if response.status_code >= 400:
+                        logger.info(f"[TRACE]   response body: {response.text[:500]}")
                 response.raise_for_status()
                 return response
@@ -664,6 +698,7 @@ class FeatrixSphereClient:
             jobs={},
             job_queue_positions={},
             job_plan=[],
+            compute_cluster=response_data.get('compute_cluster'),
             _client=self
         )
@@ -696,6 +731,7 @@ class FeatrixSphereClient:
             jobs=jobs,
             job_queue_positions=positions,
             job_plan=job_plan,
+            compute_cluster=session.get('compute_cluster'),
             _client=self
         )
@@ -4942,11 +4978,14 @@ class FeatrixSphereClient:
                             pass
             new_session_id = response_data.get('session_id')
+            compute_cluster = response_data.get('compute_cluster')
             print(f"✅ Predictor training session created: {new_session_id}")
+            if compute_cluster:
+                print(f"   Compute cluster: {compute_cluster}")
         except Exception as e:
             raise
         if verbose:
             print(f"⏳ Waiting for training to complete...")
             return self.wait_for_session_completion(
@@ -4962,6 +5001,7 @@ class FeatrixSphereClient:
                 jobs={},
                 job_queue_positions={},
                 job_plan=[],
+                compute_cluster=compute_cluster,
                 _client=self
             )
@@ -6396,32 +6436,53 @@ class FeatrixSphereClient:
     # JSON Tables Batch Prediction
     # =========================================================================
-    def predict_table(self, session_id: str, table_data: Dict[str, Any],
-                     target_column: str = None, predictor_id: str = None,
-                     best_metric_preference: str = None, max_retries: int = None) -> Dict[str, Any]:
+    def predict_table(self, session_id: str, table_data: Dict[str, Any],
+                     target_column: str = None, predictor_id: str = None,
+                     best_metric_preference: str = None, max_retries: int = None,
+                     trace: bool = False) -> Dict[str, Any]:
         """
         Make batch predictions using JSON Tables format.
         Args:
             session_id: ID of session with trained predictor
             table_data: Data in JSON Tables format, or list of records, or dict with 'table'/'records'
             target_column: Specific target column predictor to use (required if multiple predictors exist)
             predictor_id: Specific predictor ID to use (recommended - more precise than target_column)
             max_retries: Number of retries for errors (default: uses client default, recommend higher for batch)
+            trace: Enable detailed debug logging (default: False)
         Returns:
             Batch prediction results in JSON Tables format
         Raises:
             ValueError: If multiple predictors exist and neither target_column nor predictor_id is specified
             PredictorNotFoundError: If no single predictor has been trained for this session
         """
+        if trace:
+            print(f"[TRACE] predict_table called")
+            print(f"[TRACE]   session_id: {session_id}")
+            print(f"[TRACE]   target_column: {target_column}")
+            print(f"[TRACE]   predictor_id: {predictor_id}")
+            print(f"[TRACE]   table_data type: {type(table_data)}")
+            if isinstance(table_data, dict):
+                print(f"[TRACE]   table_data keys: {list(table_data.keys())}")
+                if 'records' in table_data:
+                    print(f"[TRACE]   num records: {len(table_data.get('records', []))}")
+                if 'table' in table_data:
+                    print(f"[TRACE]   table keys: {list(table_data['table'].keys()) if isinstance(table_data['table'], dict) else 'not a dict'}")
         # Use higher default for batch operations if not specified
         if max_retries is None:
             max_retries = max(5, self.default_max_retries)
+        if trace:
+            print(f"[TRACE]   max_retries: {max_retries}")
         # Check if multiple predictors exist and require specification
         predictors = self._get_available_predictors(session_id, debug=False)
+        if trace:
+            print(f"[TRACE]   found {len(predictors)} predictors: {list(predictors.keys())}")
         if len(predictors) > 1 and not target_column and not predictor_id:
             available_targets = [p.get('target_column') for p in predictors.values() if p.get('target_column')]
             available_ids = list(predictors.keys())
@@ -6432,7 +6493,7 @@ class FeatrixSphereClient:
                 f"Available predictor IDs: {available_ids}\n"
                 f"Use client.list_predictors('{session_id}') to see details."
             )
         # Add predictor specification to table_data if provided
         if isinstance(table_data, dict):
             if target_column:
@@ -6441,24 +6502,45 @@ class FeatrixSphereClient:
                 table_data['predictor_id'] = predictor_id
             if best_metric_preference:
                 table_data['best_metric_preference'] = best_metric_preference
+        if trace:
+            print(f"[TRACE]   POST /session/{session_id}/predict_table")
+            # Print first record sample if available
+            if isinstance(table_data, dict) and 'records' in table_data and table_data['records']:
+                print(f"[TRACE]   first record sample: {table_data['records'][0]}")
         try:
             response_data = self._post_json(f"/session/{session_id}/predict_table", table_data, max_retries=max_retries)
+            if trace:
+                print(f"[TRACE]   response keys: {list(response_data.keys()) if isinstance(response_data, dict) else 'not a dict'}")
+                if isinstance(response_data, dict):
+                    print(f"[TRACE]   predictions count: {len(response_data.get('predictions', []))}")
+                    if response_data.get('predictions'):
+                        print(f"[TRACE]   first prediction: {response_data['predictions'][0]}")
+                    if response_data.get('error'):
+                        print(f"[TRACE]   ERROR: {response_data.get('error')}")
+                    if response_data.get('errors'):
+                        print(f"[TRACE]   ERRORS: {response_data.get('errors')}")
             return response_data
         except Exception as e:
+            if trace:
+                print(f"[TRACE]   EXCEPTION: {type(e).__name__}: {e}")
+                import traceback
+                print(f"[TRACE]   {traceback.format_exc()}")
             # Enhanced error handling for common prediction issues
             if "404" in str(e) and "Single predictor not found" in str(e):
                 self._raise_predictor_not_found_error(session_id, "predict_table")
             else:
                 raise
-    def predict_records(self, session_id: str, records: List[Dict[str, Any]],
+    def predict_records(self, session_id: str, records: List[Dict[str, Any]],
                        target_column: str = None, predictor_id: str = None, best_metric_preference: str = None,
-                       batch_size: int = 2500, use_async: bool = False,
-                       show_progress_bar: bool = True, print_target_column_warning: bool = True) -> Dict[str, Any]:
+                       batch_size: int = 2500, use_async: bool = False,
+                       show_progress_bar: bool = True, print_target_column_warning: bool = True,
+                       trace: bool = False) -> Dict[str, Any]:
         """
         Make batch predictions on a list of records with automatic client-side batching.
         Args:
             session_id: ID of session with trained predictor
             records: List of record dictionaries
@@ -6468,24 +6550,42 @@ class FeatrixSphereClient:
             use_async: Force async processing for large datasets (default: False - async disabled due to pickle issues)
             show_progress_bar: Whether to show progress bar for async jobs (default: True)
             print_target_column_warning: Whether to print warning when removing target column (default: True)
+            trace: Enable detailed debug logging (default: False)
         Returns:
             Batch prediction results (may include job_id for async processing)
         Note:
             predictor_id is recommended over target_column for precision. If both are provided, predictor_id takes precedence.
         Raises:
             ValueError: If target_column is invalid or multiple predictors exist without specification
         """
+        if trace:
+            print(f"[TRACE] predict_records called")
+            print(f"[TRACE]   session_id: {session_id}")
+            print(f"[TRACE]   num records: {len(records)}")
+            print(f"[TRACE]   target_column: {target_column}")
+            print(f"[TRACE]   predictor_id: {predictor_id}")
+            print(f"[TRACE]   batch_size: {batch_size}")
+            if records:
+                print(f"[TRACE]   first record keys: {list(records[0].keys())}")
+                print(f"[TRACE]   first record sample: {records[0]}")
         # Clean NaN/Inf values before sending
         cleaned_records = self._clean_numpy_values(records)
         # Additional NaN cleaning for JSON encoding
         cleaned_records = self.replace_nans_with_nulls(cleaned_records)
         # Remove target column that would interfere with prediction
         cleaned_records = self._remove_target_columns(session_id, cleaned_records, target_column, print_target_column_warning)
+        if trace:
+            print(f"[TRACE]   after cleaning: {len(cleaned_records)} records")
+            if cleaned_records:
+                print(f"[TRACE]   cleaned first record keys: {list(cleaned_records[0].keys())}")
+                print(f"[TRACE]   cleaned first record: {cleaned_records[0]}")
         # Determine if we should use async processing
         ASYNC_THRESHOLD = 1000
         total_records = len(cleaned_records)

{featrixsphere-0.2.5183.dist-info → featrixsphere-0.2.5566.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: featrixsphere
-Version: 0.2.5183
+Version: 0.2.5566
 Summary: Transform any CSV into a production-ready ML model in minutes, not months.
 Home-page: https://github.com/Featrix/sphere
 Author: Featrix

featrixsphere-0.2.5566.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+featrixsphere/__init__.py,sha256=0xTcC19HBfAM1o3XtHqchORTD-0fFBscBomU1jP2xYc,1888
+featrixsphere/client.py,sha256=GsGCWSvW9PhL57cgPfZZ-mkiHzXxSCaeQRZKs1kfKqY,440159
+featrixsphere-0.2.5566.dist-info/METADATA,sha256=TEf5XbD_CjoJUq_M7NST61oo1IftZKKth43BI21hXcE,16232
+featrixsphere-0.2.5566.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+featrixsphere-0.2.5566.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
+featrixsphere-0.2.5566.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
+featrixsphere-0.2.5566.dist-info/RECORD,,

featrixsphere-0.2.5183.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-featrixsphere/__init__.py,sha256=tPVhFLHujHMC0bnsnjggdMIb2chNkYbbm0wUX-lwWYY,1888
-featrixsphere/client.py,sha256=YvOB2y8zh4iCMccXQ-4ZsQ8dgmUSQlkLh2zsxIiIoYM,435090
-featrixsphere-0.2.5183.dist-info/METADATA,sha256=BEVBhDxyQvjFfDWyzSSKTZLwybnQFyPYU6dk_cxB5CM,16232
-featrixsphere-0.2.5183.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-featrixsphere-0.2.5183.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
-featrixsphere-0.2.5183.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
-featrixsphere-0.2.5183.dist-info/RECORD,,

{featrixsphere-0.2.5183.dist-info → featrixsphere-0.2.5566.dist-info}/WHEEL RENAMED Viewed

File without changes

{featrixsphere-0.2.5183.dist-info → featrixsphere-0.2.5566.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{featrixsphere-0.2.5183.dist-info → featrixsphere-0.2.5566.dist-info}/top_level.txt RENAMED Viewed

File without changes

featrixsphere 0.2.5183__py3-none-any.whl → 0.2.5566__py3-none-any.whl

featrixsphere 0.2.5183py3-none-any.whl → 0.2.5566py3-none-any.whl