featrixsphere 0.2.5566__py3-none-any.whl → 0.2.5978__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,396 @@
1
+ """
2
+ FeatrixSphere main client class.
3
+
4
+ This is the entry point for the new FeatrixSphere API.
5
+ """
6
+
7
+ import io
8
+ import gzip
9
+ import logging
10
+ import requests
11
+ from pathlib import Path
12
+ from typing import Dict, Any, Optional, List, Union, TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ import pandas as pd
16
+
17
+ from .http_client import HTTPClientMixin, ClientContext
18
+ from .foundational_model import FoundationalModel
19
+ from .predictor import Predictor
20
+ from .vector_database import VectorDatabase
21
+ from .prediction_result import PredictionFeedback
22
+ from .api_endpoint import APIEndpoint
23
+ from .notebook_helper import FeatrixNotebookHelper
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class FeatrixSphere(HTTPClientMixin):
29
+ """
30
+ Main client for interacting with FeatrixSphere.
31
+
32
+ This is the entry point for the new object-oriented API.
33
+
34
+ Usage:
35
+ from featrixsphere.api import FeatrixSphere
36
+
37
+ # Connect to FeatrixSphere
38
+ featrix = FeatrixSphere("https://sphere-api.featrix.com")
39
+
40
+ # Create foundational model
41
+ fm = featrix.create_foundational_model(
42
+ name="my_model",
43
+ csv_file="data.csv"
44
+ )
45
+ fm.wait_for_training()
46
+
47
+ # Create predictor
48
+ predictor = fm.create_classifier(
49
+ name="my_classifier",
50
+ target_column="target"
51
+ )
52
+ predictor.wait_for_training()
53
+
54
+ # Make predictions
55
+ result = predictor.predict({"feature1": "value1"})
56
+ print(result.predicted_class)
57
+ print(result.confidence)
58
+
59
+ On-Premises Deployment:
60
+ Featrix offers on-premises data processing with qualified NVIDIA
61
+ hardware configurations. The API works exactly the same - just
62
+ point your client to your on-premises endpoint:
63
+
64
+ featrix = FeatrixSphere("https://your-on-premises-server.com")
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ base_url: str = "https://sphere-api.featrix.com",
70
+ compute_cluster: Optional[str] = None,
71
+ default_max_retries: int = 5,
72
+ default_timeout: int = 30,
73
+ retry_base_delay: float = 2.0,
74
+ retry_max_delay: float = 60.0,
75
+ ):
76
+ """
77
+ Initialize the FeatrixSphere client.
78
+
79
+ Args:
80
+ base_url: API server URL
81
+ compute_cluster: Compute cluster name (e.g., "burrito", "churro")
82
+ default_max_retries: Default retry count for failed requests
83
+ default_timeout: Default request timeout in seconds
84
+ retry_base_delay: Base delay for exponential backoff
85
+ retry_max_delay: Maximum delay for exponential backoff
86
+ """
87
+ self._base_url = base_url.rstrip('/')
88
+ self._session = requests.Session()
89
+ self._session.timeout = default_timeout
90
+
91
+ # Set User-Agent
92
+ try:
93
+ from featrixsphere import __version__
94
+ self._session.headers.update({'User-Agent': f'FeatrixSphere {__version__}'})
95
+ except ImportError:
96
+ self._session.headers.update({'User-Agent': 'FeatrixSphere'})
97
+
98
+ # Compute cluster
99
+ self._compute_cluster = compute_cluster
100
+ if compute_cluster:
101
+ self._session.headers.update({'X-Featrix-Node': compute_cluster})
102
+
103
+ # Retry config
104
+ self._default_max_retries = default_max_retries
105
+ self._retry_base_delay = retry_base_delay
106
+ self._retry_max_delay = retry_max_delay
107
+
108
+ # Client context for resource classes
109
+ self._ctx = ClientContext(self)
110
+
111
+ def set_compute_cluster(self, cluster: Optional[str]) -> None:
112
+ """
113
+ Set the compute cluster for all subsequent requests.
114
+
115
+ Args:
116
+ cluster: Cluster name or None for default
117
+ """
118
+ self._compute_cluster = cluster
119
+ if cluster:
120
+ self._session.headers.update({'X-Featrix-Node': cluster})
121
+ else:
122
+ self._session.headers.pop('X-Featrix-Node', None)
123
+
124
+ def create_foundational_model(
125
+ self,
126
+ name: Optional[str] = None,
127
+ csv_file: Optional[str] = None,
128
+ df: Optional['pd.DataFrame'] = None,
129
+ ignore_columns: Optional[List[str]] = None,
130
+ epochs: Optional[int] = None,
131
+ webhooks: Optional[Dict[str, str]] = None,
132
+ user_metadata: Optional[Dict[str, Any]] = None,
133
+ **kwargs
134
+ ) -> FoundationalModel:
135
+ """
136
+ Create a foundational model (embedding space).
137
+
138
+ Args:
139
+ name: Model name
140
+ csv_file: Path to CSV file with training data
141
+ df: DataFrame with training data (alternative to csv_file)
142
+ ignore_columns: Columns to ignore during training
143
+ epochs: Number of training epochs (None = auto)
144
+ webhooks: Webhook URLs for events
145
+ user_metadata: Custom metadata (max 32KB)
146
+ **kwargs: Additional parameters
147
+
148
+ Returns:
149
+ FoundationalModel object (training started)
150
+
151
+ Example:
152
+ fm = featrix.create_foundational_model(
153
+ name="customer_embeddings",
154
+ csv_file="customers.csv",
155
+ ignore_columns=["id", "timestamp"]
156
+ )
157
+ fm.wait_for_training()
158
+ """
159
+ # Prepare file content
160
+ if df is not None:
161
+ file_content, filename = self._dataframe_to_file(df)
162
+ elif csv_file:
163
+ file_content, filename = self._read_file(csv_file)
164
+ else:
165
+ raise ValueError("Either csv_file or df must be provided")
166
+
167
+ # Build form data
168
+ form_data = {}
169
+ if name:
170
+ form_data['name'] = name
171
+ if ignore_columns:
172
+ import json
173
+ form_data['ignore_columns'] = json.dumps(ignore_columns)
174
+ if epochs is not None:
175
+ form_data['epochs'] = str(epochs)
176
+ if webhooks:
177
+ import json
178
+ form_data['webhooks'] = json.dumps(webhooks)
179
+ if user_metadata:
180
+ import json
181
+ form_data['user_metadata'] = json.dumps(user_metadata)
182
+
183
+ # Add any extra kwargs
184
+ for key, value in kwargs.items():
185
+ if value is not None:
186
+ if isinstance(value, (dict, list)):
187
+ import json
188
+ form_data[key] = json.dumps(value)
189
+ else:
190
+ form_data[key] = str(value)
191
+
192
+ # Upload file and create session
193
+ files = {'file': (filename, file_content)}
194
+
195
+ response = self._post_multipart(
196
+ "/compute/upload_with_new_session/",
197
+ data=form_data,
198
+ files=files
199
+ )
200
+
201
+ session_id = response.get('session_id', '')
202
+
203
+ # Handle warnings
204
+ warnings = response.get('warnings', [])
205
+ if warnings:
206
+ for warning in warnings:
207
+ logger.warning(f"Upload warning: {warning}")
208
+
209
+ return FoundationalModel(
210
+ id=session_id,
211
+ name=name,
212
+ status="training",
213
+ created_at=None,
214
+ _ctx=self._ctx,
215
+ )
216
+
217
+ def foundational_model(self, fm_id: str) -> FoundationalModel:
218
+ """
219
+ Get an existing foundational model by ID.
220
+
221
+ Args:
222
+ fm_id: Foundational model (session) ID
223
+
224
+ Returns:
225
+ FoundationalModel object
226
+
227
+ Example:
228
+ fm = featrix.foundational_model("abc123")
229
+ print(fm.status)
230
+ """
231
+ return FoundationalModel.from_session_id(fm_id, self._ctx)
232
+
233
+ def predictor(self, predictor_id: str, session_id: Optional[str] = None) -> Predictor:
234
+ """
235
+ Get an existing predictor by ID.
236
+
237
+ Args:
238
+ predictor_id: Predictor ID
239
+ session_id: Session ID (required if predictor_id alone is not unique)
240
+
241
+ Returns:
242
+ Predictor object
243
+
244
+ Note:
245
+ In most cases, you should access predictors through the
246
+ FoundationalModel: fm.list_predictors()
247
+ """
248
+ if not session_id:
249
+ # Try to find session from predictor ID
250
+ # This may not work for all cases
251
+ raise ValueError("session_id is required to load a predictor")
252
+
253
+ # Get predictor info from session
254
+ response = self._get_json(f"/session/{session_id}/predictor")
255
+ predictors_data = response.get('predictors', {})
256
+
257
+ if predictor_id not in predictors_data:
258
+ raise ValueError(f"Predictor {predictor_id} not found in session {session_id}")
259
+
260
+ pred_info = predictors_data[predictor_id]
261
+
262
+ return Predictor(
263
+ id=predictor_id,
264
+ session_id=session_id,
265
+ target_column=pred_info.get('target_column', ''),
266
+ target_type=pred_info.get('target_type', 'set'),
267
+ name=pred_info.get('name'),
268
+ status=pred_info.get('status'),
269
+ accuracy=pred_info.get('accuracy'),
270
+ _ctx=self._ctx,
271
+ )
272
+
273
+ def vector_database(self, vdb_id: str) -> VectorDatabase:
274
+ """
275
+ Get an existing vector database by ID.
276
+
277
+ Args:
278
+ vdb_id: Vector database (session) ID
279
+
280
+ Returns:
281
+ VectorDatabase object
282
+ """
283
+ return VectorDatabase.from_session(vdb_id, ctx=self._ctx)
284
+
285
+ def api_endpoint(self, endpoint_id: str, session_id: str) -> APIEndpoint:
286
+ """
287
+ Get an existing API endpoint by ID.
288
+
289
+ Args:
290
+ endpoint_id: API endpoint ID
291
+ session_id: Session ID
292
+
293
+ Returns:
294
+ APIEndpoint object
295
+ """
296
+ response = self._get_json(f"/session/{session_id}/endpoint/{endpoint_id}")
297
+
298
+ return APIEndpoint.from_response(
299
+ response=response,
300
+ predictor_id=response.get('predictor_id', ''),
301
+ session_id=session_id,
302
+ ctx=self._ctx,
303
+ )
304
+
305
+ def get_notebook(self) -> FeatrixNotebookHelper:
306
+ """
307
+ Get the Jupyter notebook visualization helper.
308
+
309
+ Returns a helper object with methods for visualizing training,
310
+ embedding spaces, and model analysis in Jupyter notebooks.
311
+
312
+ Returns:
313
+ FeatrixNotebookHelper instance
314
+
315
+ Example:
316
+ notebook = featrix.get_notebook()
317
+ fig = notebook.training_loss(fm)
318
+ fig.show()
319
+ """
320
+ return FeatrixNotebookHelper(ctx=self._ctx)
321
+
322
+ def prediction_feedback(
323
+ self,
324
+ prediction_uuid: str,
325
+ ground_truth: Union[str, float]
326
+ ) -> Dict[str, Any]:
327
+ """
328
+ Send feedback for a prediction.
329
+
330
+ Convenience method that creates and sends feedback in one call.
331
+
332
+ Args:
333
+ prediction_uuid: UUID from PredictionResult.prediction_uuid
334
+ ground_truth: The correct label/value
335
+
336
+ Returns:
337
+ Server response
338
+ """
339
+ return PredictionFeedback.create_and_send(
340
+ ctx=self._ctx,
341
+ prediction_uuid=prediction_uuid,
342
+ ground_truth=ground_truth
343
+ )
344
+
345
+ def health_check(self) -> Dict[str, Any]:
346
+ """
347
+ Check if the API server is healthy.
348
+
349
+ Returns:
350
+ Health status dictionary
351
+ """
352
+ return self._get_json("/health")
353
+
354
+ def _dataframe_to_file(self, df: 'pd.DataFrame') -> tuple:
355
+ """Convert DataFrame to file content and filename."""
356
+ # Try parquet first (more efficient)
357
+ try:
358
+ import pyarrow
359
+ buffer = io.BytesIO()
360
+ df.to_parquet(buffer, index=False)
361
+ return buffer.getvalue(), "data.parquet"
362
+ except ImportError:
363
+ pass
364
+
365
+ # Fall back to CSV
366
+ csv_buffer = io.StringIO()
367
+ df.to_csv(csv_buffer, index=False)
368
+ content = csv_buffer.getvalue().encode('utf-8')
369
+
370
+ # Compress if large
371
+ if len(content) > 100_000:
372
+ compressed = gzip.compress(content)
373
+ if len(compressed) < len(content):
374
+ return compressed, "data.csv.gz"
375
+
376
+ return content, "data.csv"
377
+
378
+ def _read_file(self, file_path: str) -> tuple:
379
+ """Read file content and return with filename."""
380
+ path = Path(file_path)
381
+ filename = path.name
382
+
383
+ with open(path, 'rb') as f:
384
+ content = f.read()
385
+
386
+ # Compress if large and not already compressed
387
+ if len(content) > 100_000 and not filename.endswith('.gz'):
388
+ compressed = gzip.compress(content)
389
+ if len(compressed) < len(content):
390
+ return compressed, filename + '.gz'
391
+
392
+ return content, filename
393
+
394
+ def __repr__(self) -> str:
395
+ cluster_str = f", cluster='{self._compute_cluster}'" if self._compute_cluster else ""
396
+ return f"FeatrixSphere(url='{self._base_url}'{cluster_str})"