featrixsphere 0.2.5978__py3-none-any.whl → 0.2.6379__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
featrixsphere/__init__.py CHANGED
@@ -57,7 +57,7 @@ TWO API OPTIONS:
57
57
  >>> print(result['prediction'])
58
58
  """
59
59
 
60
- __version__ = "0.2.5978"
60
+ __version__ = "0.2.6379"
61
61
  __author__ = "Featrix"
62
62
  __email__ = "support@featrix.com"
63
63
  __license__ = "MIT"
featrixsphere/client.py CHANGED
@@ -137,22 +137,24 @@ class SessionInfo:
137
137
  class PredictionBatch:
138
138
  """
139
139
  Cached prediction batch that allows instant lookups after initial batch processing.
140
-
140
+
141
141
  Usage:
142
142
  # First run - populate cache
143
143
  batch = client.predict_batch(session_id, records)
144
-
144
+
145
145
  # Second run - instant cache lookups
146
146
  for i in values1:
147
147
  for j in values2:
148
148
  record = {"param1": i, "param2": j}
149
149
  result = batch.predict(record) # Instant!
150
150
  """
151
-
152
- def __init__(self, session_id: str, client: 'FeatrixSphereClient', target_column: str = None):
151
+
152
+ def __init__(self, session_id: str, client: 'FeatrixSphereClient', target_column: str = None,
153
+ best_metric_preference: str = None):
153
154
  self.session_id = session_id
154
155
  self.client = client
155
156
  self.target_column = target_column
157
+ self.best_metric_preference = best_metric_preference
156
158
  self._cache = {} # record_hash -> prediction_result
157
159
  self._stats = {'hits': 0, 'misses': 0, 'populated': 0}
158
160
 
@@ -203,14 +205,15 @@ class PredictionBatch:
203
205
  """Populate the cache with batch predictions."""
204
206
  if not records:
205
207
  return {'summary': {'total_records': 0, 'successful': 0, 'failed': 0}}
206
-
208
+
207
209
  print(f"🚀 Creating prediction batch for {len(records)} records...")
208
-
210
+
209
211
  # Use existing batch prediction system
210
212
  batch_results = self.client.predict_records(
211
213
  session_id=self.session_id,
212
214
  records=records,
213
215
  target_column=self.target_column,
216
+ best_metric_preference=self.best_metric_preference,
214
217
  show_progress_bar=True
215
218
  )
216
219
 
@@ -1958,7 +1961,7 @@ class FeatrixSphereClient:
1958
1961
  ax.text(0.5, 0.5, f'Error plotting embedding: {e}',
1959
1962
  transform=ax.transAxes, ha='center', va='center')
1960
1963
 
1961
- def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, s3_visualization_dataset: str = None, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None) -> SessionInfo:
1964
+ def create_embedding_space(self, name: str, s3_training_dataset: str, s3_validation_dataset: str, s3_visualization_dataset: str = None, webhooks: Dict[str, str] = None, user_metadata: Dict[str, Any] = None, foundation_mode: bool = None) -> SessionInfo:
1962
1965
  """
1963
1966
  Create a new embedding space from S3 training and validation datasets.
1964
1967
 
@@ -1969,6 +1972,9 @@ class FeatrixSphereClient:
1969
1972
  s3_visualization_dataset: Optional S3 URL for visualization dataset for epoch projection animations (must start with 's3://')
1970
1973
  webhooks: Optional dict with webhook configuration keys (webhook_callback_secret, s3_backup_url, model_id_update_url)
1971
1974
  user_metadata: Optional user metadata for ES/SP identification (max 32KB)
1975
+ foundation_mode: Force foundation training mode for large datasets. If True, uses foundation
1976
+ training (chunked iteration, SQLite-backed splits). If False, uses standard
1977
+ training. If None (default), auto-detects based on dataset size (>=100k rows).
1972
1978
 
1973
1979
  Returns:
1974
1980
  SessionInfo for the newly created embedding space session
@@ -1989,6 +1995,8 @@ class FeatrixSphereClient:
1989
1995
  print(f" Validation: {s3_validation_dataset}")
1990
1996
  if s3_visualization_dataset:
1991
1997
  print(f" Visualization: {s3_visualization_dataset}")
1998
+ if foundation_mode is not None:
1999
+ print(f" Foundation mode: {'enabled' if foundation_mode else 'disabled'}")
1992
2000
 
1993
2001
  data = {
1994
2002
  "name": name,
@@ -2003,6 +2011,8 @@ class FeatrixSphereClient:
2003
2011
  import json
2004
2012
  data['user_metadata'] = json.dumps(user_metadata)
2005
2013
  print(f"User metadata: {user_metadata}")
2014
+ if foundation_mode is not None:
2015
+ data['foundation_mode'] = foundation_mode
2006
2016
 
2007
2017
  response_data = self._post_json("/compute/create-embedding-space", data)
2008
2018
 
@@ -7737,23 +7747,25 @@ class FeatrixSphereClient:
7737
7747
  else:
7738
7748
  return data
7739
7749
 
7740
- def predict_csv_file(self, session_id: str, file_path: Path) -> Dict[str, Any]:
7750
+ def predict_csv_file(self, session_id: str, file_path: Path,
7751
+ best_metric_preference: str = None) -> Dict[str, Any]:
7741
7752
  """
7742
7753
  Make batch predictions on a CSV file.
7743
-
7754
+
7744
7755
  Args:
7745
7756
  session_id: ID of session with trained predictor
7746
7757
  file_path: Path to CSV file
7747
-
7758
+ best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
7759
+
7748
7760
  Returns:
7749
7761
  Batch prediction results
7750
7762
  """
7751
7763
  import pandas as pd
7752
7764
  from jsontables import JSONTablesEncoder
7753
-
7765
+
7754
7766
  if not file_path.exists():
7755
7767
  raise FileNotFoundError(f"File not found: {file_path}")
7756
-
7768
+
7757
7769
  # Support CSV, Parquet, JSON, and JSONL files
7758
7770
  file_path_str = str(file_path).lower()
7759
7771
  if file_path_str.endswith('.parquet'):
@@ -7772,29 +7784,31 @@ class FeatrixSphereClient:
7772
7784
  df = pd.read_json(file_path)
7773
7785
  else:
7774
7786
  df = pd.read_csv(file_path)
7775
-
7787
+
7776
7788
  # Convert to JSON Tables format and clean NaNs
7777
7789
  table_data = JSONTablesEncoder.from_dataframe(df)
7778
7790
  cleaned_table_data = self.replace_nans_with_nulls(table_data)
7779
-
7780
- return self.predict_table(session_id, cleaned_table_data)
7781
7791
 
7782
- def run_predictions(self, session_id: str, records: List[Dict[str, Any]]) -> Dict[str, Any]:
7792
+ return self.predict_table(session_id, cleaned_table_data, best_metric_preference=best_metric_preference)
7793
+
7794
+ def run_predictions(self, session_id: str, records: List[Dict[str, Any]],
7795
+ best_metric_preference: str = None) -> Dict[str, Any]:
7783
7796
  """
7784
7797
  Run predictions on provided records. Clean and fast for production use.
7785
-
7798
+
7786
7799
  Args:
7787
7800
  session_id: ID of session with trained predictor
7788
7801
  records: List of record dictionaries
7789
-
7802
+ best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
7803
+
7790
7804
  Returns:
7791
7805
  Dictionary with prediction results
7792
7806
  """
7793
7807
  # Clean NaNs for JSON encoding
7794
7808
  cleaned_records = self.replace_nans_with_nulls(records)
7795
-
7809
+
7796
7810
  # Make batch predictions
7797
- batch_results = self.predict_records(session_id, cleaned_records)
7811
+ batch_results = self.predict_records(session_id, cleaned_records, best_metric_preference=best_metric_preference)
7798
7812
  predictions = batch_results['predictions']
7799
7813
 
7800
7814
  # Process predictions into clean format
@@ -8510,32 +8524,33 @@ class FeatrixSphereClient:
8510
8524
 
8511
8525
  return cleared_counts
8512
8526
 
8513
- def predict_batch(self, session_id: str, records: List[Dict[str, Any]],
8514
- target_column: str = None) -> PredictionBatch:
8527
+ def predict_batch(self, session_id: str, records: List[Dict[str, Any]],
8528
+ target_column: str = None, best_metric_preference: str = None) -> PredictionBatch:
8515
8529
  """
8516
8530
  Create a prediction batch for instant cached lookups.
8517
-
8531
+
8518
8532
  Perfect for parameter sweeps, grid searches, and exploring prediction surfaces.
8519
8533
  Run your loops twice with identical code - first populates cache, second gets instant results.
8520
-
8534
+
8521
8535
  Args:
8522
8536
  session_id: ID of session with trained predictor
8523
8537
  records: List of all records you'll want to predict on
8524
8538
  target_column: Specific target column predictor to use
8525
-
8539
+ best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
8540
+
8526
8541
  Returns:
8527
8542
  PredictionBatch object with instant predict() method
8528
-
8543
+
8529
8544
  Example:
8530
8545
  # Generate all combinations you'll need
8531
8546
  records = []
8532
8547
  for i in range(10):
8533
8548
  for j in range(10):
8534
8549
  records.append({"param1": i, "param2": j})
8535
-
8550
+
8536
8551
  # First run - populate cache with batch processing
8537
8552
  batch = client.predict_batch(session_id, records)
8538
-
8553
+
8539
8554
  # Second run - same loops but instant cache lookups
8540
8555
  results = []
8541
8556
  for i in range(10):
@@ -8545,50 +8560,52 @@ class FeatrixSphereClient:
8545
8560
  results.append(result)
8546
8561
  """
8547
8562
  # Create batch object
8548
- batch = PredictionBatch(session_id, self, target_column)
8549
-
8563
+ batch = PredictionBatch(session_id, self, target_column, best_metric_preference)
8564
+
8550
8565
  # Populate cache with batch predictions
8551
8566
  batch._populate_cache(records)
8552
-
8567
+
8553
8568
  return batch
8554
8569
 
8555
- def predict_grid(self, session_id: str, degrees_of_freedom: int,
8556
- grid_shape: tuple = None, target_column: str = None) -> 'PredictionGrid':
8570
+ def predict_grid(self, session_id: str, degrees_of_freedom: int,
8571
+ grid_shape: tuple = None, target_column: str = None,
8572
+ best_metric_preference: str = None) -> 'PredictionGrid':
8557
8573
  """
8558
8574
  Create a prediction grid for exploring parameter surfaces with automatic visualization.
8559
-
8575
+
8560
8576
  Perfect for 1D curves, 2D heatmaps, and 3D surfaces with built-in plotting functions.
8561
-
8577
+
8562
8578
  Args:
8563
8579
  session_id: ID of session with trained predictor
8564
8580
  degrees_of_freedom: Number of dimensions (1, 2, or 3)
8565
8581
  grid_shape: Custom grid shape tuple (default: auto-sized)
8566
8582
  target_column: Specific target column predictor to use
8567
-
8583
+ best_metric_preference: Which metric checkpoint to use: "roc_auc", "pr_auc", or None (default)
8584
+
8568
8585
  Returns:
8569
8586
  PredictionGrid object with predict() and plotting methods
8570
-
8587
+
8571
8588
  Example:
8572
8589
  # 2D parameter sweep with automatic plotting
8573
8590
  grid = client.predict_grid(session_id, degrees_of_freedom=2)
8574
8591
  grid.set_axis_labels(["Spend", "Campaign Type"])
8575
8592
  grid.set_axis_values(0, [100, 250, 500])
8576
8593
  grid.set_axis_values(1, ["search", "display", "social"])
8577
-
8594
+
8578
8595
  for i, spend in enumerate([100, 250, 500]):
8579
8596
  for j, campaign in enumerate(["search", "display", "social"]):
8580
8597
  record = {"spend": spend, "campaign_type": campaign}
8581
8598
  grid.predict(record, grid_position=(i, j))
8582
-
8599
+
8583
8600
  # Automatic visualization
8584
8601
  grid.plot_heatmap() # 2D heatmap
8585
8602
  grid.plot_3d() # 3D surface
8586
-
8603
+
8587
8604
  # Find optimal parameters
8588
8605
  optimal_pos = grid.get_optimal_position()
8589
8606
  print(f"Optimal parameters at grid position: {optimal_pos}")
8590
8607
  """
8591
- return PredictionGrid(session_id, self, degrees_of_freedom, grid_shape, target_column)
8608
+ return PredictionGrid(session_id, self, degrees_of_freedom, grid_shape, target_column, best_metric_preference)
8592
8609
 
8593
8610
  def get_embedding_space_columns(self, session_id: str) -> Dict[str, Any]:
8594
8611
  """
@@ -8665,12 +8682,13 @@ class PredictionGrid:
8665
8682
  grid.plot_3d() # 3D surface plot
8666
8683
  """
8667
8684
 
8668
- def __init__(self, session_id: str, client: 'FeatrixSphereClient', degrees_of_freedom: int,
8669
- grid_shape: tuple = None, target_column: str = None):
8685
+ def __init__(self, session_id: str, client: 'FeatrixSphereClient', degrees_of_freedom: int,
8686
+ grid_shape: tuple = None, target_column: str = None, best_metric_preference: str = None):
8670
8687
  self.session_id = session_id
8671
8688
  self.client = client
8672
8689
  self.degrees_of_freedom = degrees_of_freedom
8673
8690
  self.target_column = target_column
8691
+ self.best_metric_preference = best_metric_preference
8674
8692
 
8675
8693
  # Initialize grid matrix based on degrees of freedom
8676
8694
  if grid_shape:
@@ -8762,6 +8780,7 @@ class PredictionGrid:
8762
8780
  session_id=self.session_id,
8763
8781
  records=records_list,
8764
8782
  target_column=self.target_column,
8783
+ best_metric_preference=self.best_metric_preference,
8765
8784
  show_progress_bar=show_progress
8766
8785
  )
8767
8786
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: featrixsphere
3
- Version: 0.2.5978
3
+ Version: 0.2.6379
4
4
  Summary: Transform any CSV into a production-ready ML model in minutes, not months.
5
5
  Home-page: https://github.com/Featrix/sphere
6
6
  Author: Featrix
@@ -1,5 +1,5 @@
1
- featrixsphere/__init__.py,sha256=nf_WC1Cpqfm0VmGonHMGq8ZbGIvgOJIQSIvL8Sedg4M,2190
2
- featrixsphere/client.py,sha256=W-2nx5iXX1piBCGEhyBjw0ZpuQcfNUi7W6J3y4ZwMqM,450669
1
+ featrixsphere/__init__.py,sha256=m4FTeSot2GaITV5l_kD5WrSPZKdKmVbcmRwXZE_nYJk,2190
2
+ featrixsphere/client.py,sha256=Nj6C_Th4jyK7JQIXUJ_URok9AA0OND6DOAjoFbKhs2Q,452098
3
3
  featrixsphere/api/__init__.py,sha256=quyvuPphVj9wb6v8Dio0SMG9iHgJAmY3asHk3f_zF10,1269
4
4
  featrixsphere/api/api_endpoint.py,sha256=i3eCWuaUXftnH1Ai6MFZ7md7pC2FcRAIRO87CBZhyEQ,9000
5
5
  featrixsphere/api/client.py,sha256=TdpujNsJxO4GfPMI_KoemQWV89go3KuK6OPAo9jX6Bs,12574
@@ -10,8 +10,8 @@ featrixsphere/api/prediction_result.py,sha256=Tx7LXzF4XT-U3VqAN_IFc5DvxPnygc78M2
10
10
  featrixsphere/api/predictor.py,sha256=-vwCKpCfTgZKqzpDnzy1iYZQ-1-MGW8aErvxM9trktw,17652
11
11
  featrixsphere/api/reference_record.py,sha256=-XOTF6ynznB3ouz06w3AF8X9SVId0g_dO20VvGNesUQ,7095
12
12
  featrixsphere/api/vector_database.py,sha256=BplxKkPnAbcBX1A4KxFBJVb3qkQ-FH9zi9v2dWG5CgY,7976
13
- featrixsphere-0.2.5978.dist-info/METADATA,sha256=AEmhOnYPaOz_Q7ojHzl6TyrxyzBTylNuxNktKPtk4Uo,16232
14
- featrixsphere-0.2.5978.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- featrixsphere-0.2.5978.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
16
- featrixsphere-0.2.5978.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
17
- featrixsphere-0.2.5978.dist-info/RECORD,,
13
+ featrixsphere-0.2.6379.dist-info/METADATA,sha256=EdpmIuyoX1hr1eelFuZbN-zOwrsIsN9TupOeehDJxys,16232
14
+ featrixsphere-0.2.6379.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
15
+ featrixsphere-0.2.6379.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
16
+ featrixsphere-0.2.6379.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
17
+ featrixsphere-0.2.6379.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5