PyPI - featrixsphere - Versions diffs - 0.2.5978__py3-none-any.whl → 0.2.6379__py3-none-any.whl - Mend

featrixsphere 0.2.5978py3-none-any.whl → 0.2.6379py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

featrixsphere/__init__.py CHANGED Viewed

@@ -57,7 +57,7 @@ TWO API OPTIONS:
     >>> print(result['prediction'])
 """
-__version__ = "0.2.5978"
+__version__ = "0.2.6379"
 __author__ = "Featrix"
 __email__ = "support@featrix.com"
 __license__ = "MIT"

featrixsphere/client.py CHANGED Viewed

@@ -137,22 +137,24 @@ class SessionInfo:
 class PredictionBatch:
     """
     Cached prediction batch that allows instant lookups after initial batch processing.
     Usage:
         # First run - populate cache
         batch = client.predict_batch(session_id, records)
         # Second run - instant cache lookups
         for i in values1:
             for j in values2:
                 record = {"param1": i, "param2": j}
                 result = batch.predict(record)  # Instant!
     """
-    def __init__(self, session_id: str, client: 'FeatrixSphereClient', target_column: str = None):
+    def __init__(self, session_id: str, client: 'FeatrixSphereClient', target_column: str = None,
+                 best_metric_preference: str = None):
         self.session_id = session_id
         self.client = client
         self.target_column = target_column
+        self.best_metric_preference = best_metric_preference
         self._cache = {}  # record_hash -> prediction_result
         self._stats = {'hits': 0, 'misses': 0, 'populated': 0}
@@ -203,14 +205,15 @@ class PredictionBatch:
         """Populate the cache with batch predictions."""
         if not records:
             return {'summary': {'total_records': 0, 'successful': 0, 'failed': 0}}
         print(f"🚀 Creating prediction batch for {len(records)} records...")
         # Use existing batch prediction system
         batch_results = self.client.predict_records(
             session_id=self.session_id,
             records=records,
             target_column=self.target_column,
+            best_metric_preference=self.best_metric_preference,
             show_progress_bar=True
         )
@@ -1958,7 +1961,7 @@ class FeatrixSphereClient:
             ax.text(0.5, 0.5, f'Error plotting embedding: {e}',
                    transform=ax.transAxes, ha='center', va='center')
-    def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, s3_visualization_dataset: str = None, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None) -> SessionInfo:
+    def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, s3_visualization_dataset: str = None, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None, foundation_mode: bool = None) -> SessionInfo:
         """
         Create a new embedding space from S3 training and validation datasets.
@@ -1969,6 +1972,9 @@ class FeatrixSphereClient:
             s3_visualization_dataset: Optional S3 URL for visualization dataset for epoch projection animations (must start with 's3://')
             webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
             user_metadata: Optional user metadata for ES/SP identification (max 32KB)
+            foundation_mode: Force foundation training mode for large datasets. If True, uses foundation
+                           training (chunked iteration, SQLite-backed splits). If False, uses standard
+                           training. If None (default), auto-detects based on dataset size (>=100k rows).
         Returns:
             SessionInfo for the newly created embedding space session
@@ -1989,6 +1995,8 @@ class FeatrixSphereClient:
         print(f"  Validation: {s3_validation_dataset}")
         if s3_visualization_dataset:
             print(f"  Visualization: {s3_visualization_dataset}")
+        if foundation_mode is not None:
+            print(f"  Foundation mode: {'enabled' if foundation_mode else 'disabled'}")
         data = {
             "name": name,
@@ -2003,6 +2011,8 @@ class FeatrixSphereClient:
             import json
             data['user_metadata'] = json.dumps(user_metadata)
             print(f"User metadata: {user_metadata}")
+        if foundation_mode is not None:
+            data['foundation_mode'] = foundation_mode
         response_data = self._post_json("/compute/create-embedding-space", data)
@@ -7737,23 +7747,25 @@ class FeatrixSphereClient:
         else:
             return data
-    def predict_csv_file(self, session_id: str, file_path: Path) -> Dict[str, Any]:
+    def predict_csv_file(self, session_id: str, file_path: Path,
+                          best_metric_preference: str = None) -> Dict[str, Any]:
         """
         Make batch predictions on a CSV file.
         Args:
             session_id: ID of session with trained predictor
             file_path: Path to CSV file
+            best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
         Returns:
             Batch prediction results
         """
         import pandas as pd
         from jsontables import JSONTablesEncoder
         if not file_path.exists():
             raise FileNotFoundError(f"File not found: {file_path}")
         # Support CSV, Parquet, JSON, and JSONL files
         file_path_str = str(file_path).lower()
         if file_path_str.endswith('.parquet'):
@@ -7772,29 +7784,31 @@ class FeatrixSphereClient:
             df = pd.read_json(file_path)
         else:
             df = pd.read_csv(file_path)
         # Convert to JSON Tables format and clean NaNs
         table_data = JSONTablesEncoder.from_dataframe(df)
         cleaned_table_data = self.replace_nans_with_nulls(table_data)
-        return self.predict_table(session_id, cleaned_table_data)
-    def run_predictions(self, session_id: str, records: List[Dict[str, Any]]) -> Dict[str, Any]:
+        return self.predict_table(session_id, cleaned_table_data, best_metric_preference=best_metric_preference)
+    def run_predictions(self, session_id: str, records: List[Dict[str, Any]],
+                         best_metric_preference: str = None) -> Dict[str, Any]:
         """
         Run predictions on provided records. Clean and fast for production use.
         Args:
             session_id: ID of session with trained predictor
             records: List of record dictionaries
+            best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
         Returns:
             Dictionary with prediction results
         """
         # Clean NaNs for JSON encoding
         cleaned_records = self.replace_nans_with_nulls(records)
         # Make batch predictions
-        batch_results = self.predict_records(session_id, cleaned_records)
+        batch_results = self.predict_records(session_id, cleaned_records, best_metric_preference=best_metric_preference)
         predictions = batch_results['predictions']
         # Process predictions into clean format
@@ -8510,32 +8524,33 @@ class FeatrixSphereClient:
         return cleared_counts
-    def predict_batch(self, session_id: str, records: List[Dict[str, Any]],
-                     target_column: str = None) -> PredictionBatch:
+    def predict_batch(self, session_id: str, records: List[Dict[str, Any]],
+                       target_column: str = None, best_metric_preference: str = None) -> PredictionBatch:
         """
         Create a prediction batch for instant cached lookups.
         Perfect for parameter sweeps, grid searches, and exploring prediction surfaces.
         Run your loops twice with identical code - first populates cache, second gets instant results.
         Args:
             session_id: ID of session with trained predictor
             records: List of all records you'll want to predict on
             target_column: Specific target column predictor to use
+            best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
         Returns:
             PredictionBatch object with instant predict() method
         Example:
             # Generate all combinations you'll need
             records = []
             for i in range(10):
                 for j in range(10):
                     records.append({"param1": i, "param2": j})
             # First run - populate cache with batch processing
             batch = client.predict_batch(session_id, records)
             # Second run - same loops but instant cache lookups
             results = []
             for i in range(10):
@@ -8545,50 +8560,52 @@ class FeatrixSphereClient:
                     results.append(result)
         """
         # Create batch object
-        batch = PredictionBatch(session_id, self, target_column)
+        batch = PredictionBatch(session_id, self, target_column, best_metric_preference)
         # Populate cache with batch predictions
         batch._populate_cache(records)
         return batch
-    def predict_grid(self, session_id: str, degrees_of_freedom: int,
-                    grid_shape: tuple = None, target_column: str = None) -> 'PredictionGrid':
+    def predict_grid(self, session_id: str, degrees_of_freedom: int,
+                      grid_shape: tuple = None, target_column: str = None,
+                      best_metric_preference: str = None) -> 'PredictionGrid':
         """
         Create a prediction grid for exploring parameter surfaces with automatic visualization.
         Perfect for 1D curves, 2D heatmaps, and 3D surfaces with built-in plotting functions.
         Args:
             session_id: ID of session with trained predictor
             degrees_of_freedom: Number of dimensions (1, 2, or 3)
             grid_shape: Custom grid shape tuple (default: auto-sized)
             target_column: Specific target column predictor to use
+            best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
         Returns:
             PredictionGrid object with predict() and plotting methods
         Example:
             # 2D parameter sweep with automatic plotting
             grid = client.predict_grid(session_id, degrees_of_freedom=2)
             grid.set_axis_labels(["Spend", "Campaign Type"])
             grid.set_axis_values(0, [100, 250, 500])
             grid.set_axis_values(1, ["search", "display", "social"])
             for i, spend in enumerate([100, 250, 500]):
                 for j, campaign in enumerate(["search", "display", "social"]):
                     record = {"spend": spend, "campaign_type": campaign}
                     grid.predict(record, grid_position=(i, j))
             # Automatic visualization
             grid.plot_heatmap()  # 2D heatmap
             grid.plot_3d()       # 3D surface
             # Find optimal parameters
             optimal_pos = grid.get_optimal_position()
             print(f"Optimal parameters at grid position: {optimal_pos}")
         """
-        return PredictionGrid(session_id, self, degrees_of_freedom, grid_shape, target_column)
+        return PredictionGrid(session_id, self, degrees_of_freedom, grid_shape, target_column, best_metric_preference)
     def get_embedding_space_columns(self, session_id: str) -> Dict[str, Any]:
         """
@@ -8665,12 +8682,13 @@ class PredictionGrid:
         grid.plot_3d()       # 3D surface plot
     """
-    def __init__(self, session_id: str, client: 'FeatrixSphereClient', degrees_of_freedom: int,
-                 grid_shape: tuple = None, target_column: str = None):
+    def __init__(self, session_id: str, client: 'FeatrixSphereClient', degrees_of_freedom: int,
+                 grid_shape: tuple = None, target_column: str = None, best_metric_preference: str = None):
         self.session_id = session_id
         self.client = client
         self.degrees_of_freedom = degrees_of_freedom
         self.target_column = target_column
+        self.best_metric_preference = best_metric_preference
         # Initialize grid matrix based on degrees of freedom
         if grid_shape:
@@ -8762,6 +8780,7 @@ class PredictionGrid:
                 session_id=self.session_id,
                 records=records_list,
                 target_column=self.target_column,
+                best_metric_preference=self.best_metric_preference,
                 show_progress_bar=show_progress
             )

{featrixsphere-0.2.5978.dist-info → featrixsphere-0.2.6379.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: featrixsphere
-Version: 0.2.5978
+Version: 0.2.6379
 Summary: Transform any CSV into a production-ready ML model in minutes, not months.
 Home-page: https://github.com/Featrix/sphere
 Author: Featrix

{featrixsphere-0.2.5978.dist-info → featrixsphere-0.2.6379.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-featrixsphere/__init__.py,sha256=nf_WC1Cpqfm0VmGonHMGq8ZbGIvgOJIQSIvL8Sedg4M,2190
-featrixsphere/client.py,sha256=W-2nx5iXX1piBCGEhyBjw0ZpuQcfNUi7W6J3y4ZwMqM,450669
+featrixsphere/__init__.py,sha256=m4FTeSot2GaITV5l_kD5WrSPZKdKmVbcmRwXZE_nYJk,2190
+featrixsphere/client.py,sha256=Nj6C_Th4jyK7JQIXUJ_URok9AA0OND6DOAjoFbKhs2Q,452098
 featrixsphere/api/__init__.py,sha256=quyvuPphVj9wb6v8Dio0SMG9iHgJAmY3asHk3f_zF10,1269
 featrixsphere/api/api_endpoint.py,sha256=i3eCWuaUXftnH1Ai6MFZ7md7pC2FcRAIRO87CBZhyEQ,9000
 featrixsphere/api/client.py,sha256=TdpujNsJxO4GfPMI_KoemQWV89go3KuK6OPAo9jX6Bs,12574
@@ -10,8 +10,8 @@ featrixsphere/api/prediction_result.py,sha256=Tx7LXzF4XT-U3VqAN_IFc5DvxPnygc78M2
 featrixsphere/api/predictor.py,sha256=-vwCKpCfTgZKqzpDnzy1iYZQ-1-MGW8aErvxM9trktw,17652
 featrixsphere/api/reference_record.py,sha256=-XOTF6ynznB3ouz06w3AF8X9SVId0g_dO20VvGNesUQ,7095
 featrixsphere/api/vector_database.py,sha256=BplxKkPnAbcBX1A4KxFBJVb3qkQ-FH9zi9v2dWG5CgY,7976
-featrixsphere-0.2.5978.dist-info/METADATA,sha256=AEmhOnYPaOz_Q7ojHzl6TyrxyzBTylNuxNktKPtk4Uo,16232
-featrixsphere-0.2.5978.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-featrixsphere-0.2.5978.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
-featrixsphere-0.2.5978.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
-featrixsphere-0.2.5978.dist-info/RECORD,,
+featrixsphere-0.2.6379.dist-info/METADATA,sha256=EdpmIuyoX1hr1eelFuZbN-zOwrsIsN9TupOeehDJxys,16232
+featrixsphere-0.2.6379.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
+featrixsphere-0.2.6379.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
+featrixsphere-0.2.6379.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
+featrixsphere-0.2.6379.dist-info/RECORD,,

{featrixsphere-0.2.5978.dist-info → featrixsphere-0.2.6379.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{featrixsphere-0.2.5978.dist-info → featrixsphere-0.2.6379.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{featrixsphere-0.2.5978.dist-info → featrixsphere-0.2.6379.dist-info}/top_level.txt RENAMED Viewed

File without changes

featrixsphere 0.2.5978__py3-none-any.whl → 0.2.6379__py3-none-any.whl

featrixsphere 0.2.5978py3-none-any.whl → 0.2.6379py3-none-any.whl