featrixsphere 0.2.5183__py3-none-any.whl → 0.2.5566__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
featrixsphere/__init__.py CHANGED
@@ -38,7 +38,7 @@ Example:
38
38
  ... labels=['Experiment A', 'Experiment B'])
39
39
  """
40
40
 
41
- __version__ = "0.2.5183"
41
+ __version__ = "0.2.5566"
42
42
  __author__ = "Featrix"
43
43
  __email__ = "support@featrix.com"
44
44
  __license__ = "MIT"
featrixsphere/client.py CHANGED
@@ -8,6 +8,7 @@ with a focus on the new single predictor functionality.
8
8
 
9
9
  import json
10
10
  import time
11
+ import logging
11
12
  import requests
12
13
  from pathlib import Path
13
14
  from typing import Dict, Any, Optional, List, Tuple, Union
@@ -22,6 +23,8 @@ import hashlib
22
23
  import numpy as np
23
24
  from datetime import datetime
24
25
 
26
+ logger = logging.getLogger(__name__)
27
+
25
28
  # Optional imports for plotting functionality
26
29
  try:
27
30
  import matplotlib.pyplot as plt
@@ -66,6 +69,7 @@ class SessionInfo:
66
69
  jobs: Dict[str, Any]
67
70
  job_queue_positions: Dict[str, Any]
68
71
  job_plan: List[Dict[str, Any]] = field(default_factory=list)
72
+ compute_cluster: Optional[str] = None
69
73
  _client: Optional['FeatrixSphereClient'] = None
70
74
 
71
75
  def predictors(self) -> List[Dict[str, Any]]:
@@ -211,18 +215,18 @@ class PredictionBatch:
211
215
  )
212
216
 
213
217
  # Populate cache with results
214
- predictions = batch_results.get('results', {})
218
+ predictions = batch_results.get('predictions', [])
215
219
  successful = 0
216
220
  failed = 0
217
-
218
- for queue_id, prediction in predictions.items():
221
+
222
+ for prediction in predictions:
219
223
  if isinstance(prediction, dict):
220
224
  row_index = prediction.get('row_index', 0)
221
225
  if row_index < len(records):
222
226
  record = records[row_index]
223
227
  record_hash = self._hash_record(record)
224
228
  self._cache[record_hash] = prediction
225
-
229
+
226
230
  if prediction.get('prediction') is not None:
227
231
  successful += 1
228
232
  else:
@@ -239,15 +243,16 @@ class PredictionBatch:
239
243
  class FeatrixSphereClient:
240
244
  """Client for interacting with the Featrix Sphere API."""
241
245
 
242
- def __init__(self, base_url: str = "https://sphere-api.featrix.com",
243
- default_max_retries: int = 5,
246
+ def __init__(self, base_url: str = "https://sphere-api.featrix.com",
247
+ default_max_retries: int = 5,
244
248
  default_timeout: int = 30,
245
249
  retry_base_delay: float = 2.0,
246
250
  retry_max_delay: float = 60.0,
247
- compute_cluster: str = None):
251
+ compute_cluster: str = None,
252
+ trace: bool = False):
248
253
  """
249
254
  Initialize the API client.
250
-
255
+
251
256
  Args:
252
257
  base_url: Base URL of the API server
253
258
  default_max_retries: Default number of retries for failed requests
@@ -255,6 +260,7 @@ class FeatrixSphereClient:
255
260
  retry_base_delay: Base delay for exponential backoff in seconds
256
261
  retry_max_delay: Maximum delay for exponential backoff in seconds
257
262
  compute_cluster: Compute cluster name (e.g., "burrito", "churro") for X-Featrix-Node header
263
+ trace: Enable detailed debug logging of all API requests (default: False)
258
264
  """
259
265
  self.base_url = base_url.rstrip('/')
260
266
  self.session = requests.Session()
@@ -286,6 +292,17 @@ class FeatrixSphereClient:
286
292
  self._prediction_cache = {} # session_id -> {record_hash: prediction_result}
287
293
  self._cache_mode = {} # session_id -> 'populate' or 'fetch'
288
294
  self._cache_stats = {} # session_id -> {hits: int, misses: int, populated: int}
295
+
296
+ # Trace/debug mode
297
+ self.trace = trace
298
+ if trace:
299
+ import logging
300
+ logging.basicConfig(level=logging.INFO)
301
+ logger.info(f"[TRACE] FeatrixSphereClient initialized")
302
+ logger.info(f"[TRACE] base_url: {self.base_url}")
303
+ logger.info(f"[TRACE] compute_cluster: {compute_cluster}")
304
+ logger.info(f"[TRACE] default_timeout: {default_timeout}")
305
+ logger.info(f"[TRACE] headers: {dict(self.session.headers)}")
289
306
 
290
307
  def set_compute_cluster(self, cluster: str) -> None:
291
308
  """
@@ -349,13 +366,30 @@ class FeatrixSphereClient:
349
366
  url = f"{self.base_url}{endpoint}"
350
367
  start_time = time.time()
351
368
  attempt = 0
352
-
369
+
370
+ if self.trace:
371
+ logger.info(f"[TRACE] {method} {url}")
372
+ logger.info(f"[TRACE] headers: {dict(self.session.headers)}")
373
+ if 'json' in kwargs:
374
+ json_data = kwargs['json']
375
+ if isinstance(json_data, dict):
376
+ logger.info(f"[TRACE] json keys: {list(json_data.keys())}")
377
+ # Log sample of data without flooding
378
+ if 'records' in json_data:
379
+ logger.info(f"[TRACE] num records: {len(json_data.get('records', []))}")
380
+ if 'table' in json_data:
381
+ logger.info(f"[TRACE] has table data")
382
+
353
383
  while True:
354
384
  attempt += 1
355
385
  elapsed = time.time() - start_time
356
-
386
+
357
387
  try:
358
388
  response = self.session.request(method, url, **kwargs)
389
+ if self.trace:
390
+ logger.info(f"[TRACE] response: HTTP {response.status_code} ({elapsed:.2f}s)")
391
+ if response.status_code >= 400:
392
+ logger.info(f"[TRACE] response body: {response.text[:500]}")
359
393
  response.raise_for_status()
360
394
  return response
361
395
 
@@ -664,6 +698,7 @@ class FeatrixSphereClient:
664
698
  jobs={},
665
699
  job_queue_positions={},
666
700
  job_plan=[],
701
+ compute_cluster=response_data.get('compute_cluster'),
667
702
  _client=self
668
703
  )
669
704
 
@@ -696,6 +731,7 @@ class FeatrixSphereClient:
696
731
  jobs=jobs,
697
732
  job_queue_positions=positions,
698
733
  job_plan=job_plan,
734
+ compute_cluster=session.get('compute_cluster'),
699
735
  _client=self
700
736
  )
701
737
 
@@ -4942,11 +4978,14 @@ class FeatrixSphereClient:
4942
4978
  pass
4943
4979
 
4944
4980
  new_session_id = response_data.get('session_id')
4981
+ compute_cluster = response_data.get('compute_cluster')
4945
4982
  print(f"✅ Predictor training session created: {new_session_id}")
4946
-
4983
+ if compute_cluster:
4984
+ print(f" Compute cluster: {compute_cluster}")
4985
+
4947
4986
  except Exception as e:
4948
4987
  raise
4949
-
4988
+
4950
4989
  if verbose:
4951
4990
  print(f"⏳ Waiting for training to complete...")
4952
4991
  return self.wait_for_session_completion(
@@ -4962,6 +5001,7 @@ class FeatrixSphereClient:
4962
5001
  jobs={},
4963
5002
  job_queue_positions={},
4964
5003
  job_plan=[],
5004
+ compute_cluster=compute_cluster,
4965
5005
  _client=self
4966
5006
  )
4967
5007
 
@@ -6396,32 +6436,53 @@ class FeatrixSphereClient:
6396
6436
  # JSON Tables Batch Prediction
6397
6437
  # =========================================================================
6398
6438
 
6399
- def predict_table(self, session_id: str, table_data: Dict[str, Any],
6400
- target_column: str = None, predictor_id: str = None,
6401
- best_metric_preference: str = None, max_retries: int = None) -> Dict[str, Any]:
6439
+ def predict_table(self, session_id: str, table_data: Dict[str, Any],
6440
+ target_column: str = None, predictor_id: str = None,
6441
+ best_metric_preference: str = None, max_retries: int = None,
6442
+ trace: bool = False) -> Dict[str, Any]:
6402
6443
  """
6403
6444
  Make batch predictions using JSON Tables format.
6404
-
6445
+
6405
6446
  Args:
6406
6447
  session_id: ID of session with trained predictor
6407
6448
  table_data: Data in JSON Tables format, or list of records, or dict with 'table'/'records'
6408
6449
  target_column: Specific target column predictor to use (required if multiple predictors exist)
6409
6450
  predictor_id: Specific predictor ID to use (recommended - more precise than target_column)
6410
6451
  max_retries: Number of retries for errors (default: uses client default, recommend higher for batch)
6411
-
6452
+ trace: Enable detailed debug logging (default: False)
6453
+
6412
6454
  Returns:
6413
6455
  Batch prediction results in JSON Tables format
6414
-
6456
+
6415
6457
  Raises:
6416
6458
  ValueError: If multiple predictors exist and neither target_column nor predictor_id is specified
6417
6459
  PredictorNotFoundError: If no single predictor has been trained for this session
6418
6460
  """
6461
+ if trace:
6462
+ print(f"[TRACE] predict_table called")
6463
+ print(f"[TRACE] session_id: {session_id}")
6464
+ print(f"[TRACE] target_column: {target_column}")
6465
+ print(f"[TRACE] predictor_id: {predictor_id}")
6466
+ print(f"[TRACE] table_data type: {type(table_data)}")
6467
+ if isinstance(table_data, dict):
6468
+ print(f"[TRACE] table_data keys: {list(table_data.keys())}")
6469
+ if 'records' in table_data:
6470
+ print(f"[TRACE] num records: {len(table_data.get('records', []))}")
6471
+ if 'table' in table_data:
6472
+ print(f"[TRACE] table keys: {list(table_data['table'].keys()) if isinstance(table_data['table'], dict) else 'not a dict'}")
6473
+
6419
6474
  # Use higher default for batch operations if not specified
6420
6475
  if max_retries is None:
6421
6476
  max_retries = max(5, self.default_max_retries)
6422
-
6477
+
6478
+ if trace:
6479
+ print(f"[TRACE] max_retries: {max_retries}")
6480
+
6423
6481
  # Check if multiple predictors exist and require specification
6424
6482
  predictors = self._get_available_predictors(session_id, debug=False)
6483
+ if trace:
6484
+ print(f"[TRACE] found {len(predictors)} predictors: {list(predictors.keys())}")
6485
+
6425
6486
  if len(predictors) > 1 and not target_column and not predictor_id:
6426
6487
  available_targets = [p.get('target_column') for p in predictors.values() if p.get('target_column')]
6427
6488
  available_ids = list(predictors.keys())
@@ -6432,7 +6493,7 @@ class FeatrixSphereClient:
6432
6493
  f"Available predictor IDs: {available_ids}\n"
6433
6494
  f"Use client.list_predictors('{session_id}') to see details."
6434
6495
  )
6435
-
6496
+
6436
6497
  # Add predictor specification to table_data if provided
6437
6498
  if isinstance(table_data, dict):
6438
6499
  if target_column:
@@ -6441,24 +6502,45 @@ class FeatrixSphereClient:
6441
6502
  table_data['predictor_id'] = predictor_id
6442
6503
  if best_metric_preference:
6443
6504
  table_data['best_metric_preference'] = best_metric_preference
6444
-
6505
+
6506
+ if trace:
6507
+ print(f"[TRACE] POST /session/{session_id}/predict_table")
6508
+ # Print first record sample if available
6509
+ if isinstance(table_data, dict) and 'records' in table_data and table_data['records']:
6510
+ print(f"[TRACE] first record sample: {table_data['records'][0]}")
6511
+
6445
6512
  try:
6446
6513
  response_data = self._post_json(f"/session/{session_id}/predict_table", table_data, max_retries=max_retries)
6514
+ if trace:
6515
+ print(f"[TRACE] response keys: {list(response_data.keys()) if isinstance(response_data, dict) else 'not a dict'}")
6516
+ if isinstance(response_data, dict):
6517
+ print(f"[TRACE] predictions count: {len(response_data.get('predictions', []))}")
6518
+ if response_data.get('predictions'):
6519
+ print(f"[TRACE] first prediction: {response_data['predictions'][0]}")
6520
+ if response_data.get('error'):
6521
+ print(f"[TRACE] ERROR: {response_data.get('error')}")
6522
+ if response_data.get('errors'):
6523
+ print(f"[TRACE] ERRORS: {response_data.get('errors')}")
6447
6524
  return response_data
6448
6525
  except Exception as e:
6526
+ if trace:
6527
+ print(f"[TRACE] EXCEPTION: {type(e).__name__}: {e}")
6528
+ import traceback
6529
+ print(f"[TRACE] {traceback.format_exc()}")
6449
6530
  # Enhanced error handling for common prediction issues
6450
6531
  if "404" in str(e) and "Single predictor not found" in str(e):
6451
6532
  self._raise_predictor_not_found_error(session_id, "predict_table")
6452
6533
  else:
6453
6534
  raise
6454
6535
 
6455
- def predict_records(self, session_id: str, records: List[Dict[str, Any]],
6536
+ def predict_records(self, session_id: str, records: List[Dict[str, Any]],
6456
6537
  target_column: str = None, predictor_id: str = None, best_metric_preference: str = None,
6457
- batch_size: int = 2500, use_async: bool = False,
6458
- show_progress_bar: bool = True, print_target_column_warning: bool = True) -> Dict[str, Any]:
6538
+ batch_size: int = 2500, use_async: bool = False,
6539
+ show_progress_bar: bool = True, print_target_column_warning: bool = True,
6540
+ trace: bool = False) -> Dict[str, Any]:
6459
6541
  """
6460
6542
  Make batch predictions on a list of records with automatic client-side batching.
6461
-
6543
+
6462
6544
  Args:
6463
6545
  session_id: ID of session with trained predictor
6464
6546
  records: List of record dictionaries
@@ -6468,24 +6550,42 @@ class FeatrixSphereClient:
6468
6550
  use_async: Force async processing for large datasets (default: False - async disabled due to pickle issues)
6469
6551
  show_progress_bar: Whether to show progress bar for async jobs (default: True)
6470
6552
  print_target_column_warning: Whether to print warning when removing target column (default: True)
6471
-
6553
+ trace: Enable detailed debug logging (default: False)
6554
+
6472
6555
  Returns:
6473
6556
  Batch prediction results (may include job_id for async processing)
6474
-
6557
+
6475
6558
  Note:
6476
6559
  predictor_id is recommended over target_column for precision. If both are provided, predictor_id takes precedence.
6477
-
6560
+
6478
6561
  Raises:
6479
6562
  ValueError: If target_column is invalid or multiple predictors exist without specification
6480
6563
  """
6564
+ if trace:
6565
+ print(f"[TRACE] predict_records called")
6566
+ print(f"[TRACE] session_id: {session_id}")
6567
+ print(f"[TRACE] num records: {len(records)}")
6568
+ print(f"[TRACE] target_column: {target_column}")
6569
+ print(f"[TRACE] predictor_id: {predictor_id}")
6570
+ print(f"[TRACE] batch_size: {batch_size}")
6571
+ if records:
6572
+ print(f"[TRACE] first record keys: {list(records[0].keys())}")
6573
+ print(f"[TRACE] first record sample: {records[0]}")
6574
+
6481
6575
  # Clean NaN/Inf values before sending
6482
6576
  cleaned_records = self._clean_numpy_values(records)
6483
6577
  # Additional NaN cleaning for JSON encoding
6484
6578
  cleaned_records = self.replace_nans_with_nulls(cleaned_records)
6485
-
6579
+
6486
6580
  # Remove target column that would interfere with prediction
6487
6581
  cleaned_records = self._remove_target_columns(session_id, cleaned_records, target_column, print_target_column_warning)
6488
-
6582
+
6583
+ if trace:
6584
+ print(f"[TRACE] after cleaning: {len(cleaned_records)} records")
6585
+ if cleaned_records:
6586
+ print(f"[TRACE] cleaned first record keys: {list(cleaned_records[0].keys())}")
6587
+ print(f"[TRACE] cleaned first record: {cleaned_records[0]}")
6588
+
6489
6589
  # Determine if we should use async processing
6490
6590
  ASYNC_THRESHOLD = 1000
6491
6591
  total_records = len(cleaned_records)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: featrixsphere
3
- Version: 0.2.5183
3
+ Version: 0.2.5566
4
4
  Summary: Transform any CSV into a production-ready ML model in minutes, not months.
5
5
  Home-page: https://github.com/Featrix/sphere
6
6
  Author: Featrix
@@ -0,0 +1,7 @@
1
+ featrixsphere/__init__.py,sha256=0xTcC19HBfAM1o3XtHqchORTD-0fFBscBomU1jP2xYc,1888
2
+ featrixsphere/client.py,sha256=GsGCWSvW9PhL57cgPfZZ-mkiHzXxSCaeQRZKs1kfKqY,440159
3
+ featrixsphere-0.2.5566.dist-info/METADATA,sha256=TEf5XbD_CjoJUq_M7NST61oo1IftZKKth43BI21hXcE,16232
4
+ featrixsphere-0.2.5566.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ featrixsphere-0.2.5566.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
6
+ featrixsphere-0.2.5566.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
7
+ featrixsphere-0.2.5566.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- featrixsphere/__init__.py,sha256=tPVhFLHujHMC0bnsnjggdMIb2chNkYbbm0wUX-lwWYY,1888
2
- featrixsphere/client.py,sha256=YvOB2y8zh4iCMccXQ-4ZsQ8dgmUSQlkLh2zsxIiIoYM,435090
3
- featrixsphere-0.2.5183.dist-info/METADATA,sha256=BEVBhDxyQvjFfDWyzSSKTZLwybnQFyPYU6dk_cxB5CM,16232
4
- featrixsphere-0.2.5183.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- featrixsphere-0.2.5183.dist-info/entry_points.txt,sha256=QreJeYfD_VWvbEqPmMXZ3pqqlFlJ1qZb-NtqnyhEldc,51
6
- featrixsphere-0.2.5183.dist-info/top_level.txt,sha256=AyN4wjfzlD0hWnDieuEHX0KckphIk_aC73XCG4df5uU,14
7
- featrixsphere-0.2.5183.dist-info/RECORD,,