featrixsphere 0.2.5563__py3-none-any.whl → 0.2.5978__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featrixsphere/__init__.py +37 -18
- featrixsphere/api/__init__.py +50 -0
- featrixsphere/api/api_endpoint.py +280 -0
- featrixsphere/api/client.py +396 -0
- featrixsphere/api/foundational_model.py +658 -0
- featrixsphere/api/http_client.py +209 -0
- featrixsphere/api/notebook_helper.py +584 -0
- featrixsphere/api/prediction_result.py +231 -0
- featrixsphere/api/predictor.py +537 -0
- featrixsphere/api/reference_record.py +227 -0
- featrixsphere/api/vector_database.py +269 -0
- featrixsphere/client.py +215 -12
- {featrixsphere-0.2.5563.dist-info → featrixsphere-0.2.5978.dist-info}/METADATA +1 -1
- featrixsphere-0.2.5978.dist-info/RECORD +17 -0
- featrixsphere-0.2.5563.dist-info/RECORD +0 -7
- {featrixsphere-0.2.5563.dist-info → featrixsphere-0.2.5978.dist-info}/WHEEL +0 -0
- {featrixsphere-0.2.5563.dist-info → featrixsphere-0.2.5978.dist-info}/entry_points.txt +0 -0
- {featrixsphere-0.2.5563.dist-info → featrixsphere-0.2.5978.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FeatrixSphere main client class.
|
|
3
|
+
|
|
4
|
+
This is the entry point for the new FeatrixSphere API.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import io
|
|
8
|
+
import gzip
|
|
9
|
+
import logging
|
|
10
|
+
import requests
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, Any, Optional, List, Union, TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from .http_client import HTTPClientMixin, ClientContext
|
|
18
|
+
from .foundational_model import FoundationalModel
|
|
19
|
+
from .predictor import Predictor
|
|
20
|
+
from .vector_database import VectorDatabase
|
|
21
|
+
from .prediction_result import PredictionFeedback
|
|
22
|
+
from .api_endpoint import APIEndpoint
|
|
23
|
+
from .notebook_helper import FeatrixNotebookHelper
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FeatrixSphere(HTTPClientMixin):
|
|
29
|
+
"""
|
|
30
|
+
Main client for interacting with FeatrixSphere.
|
|
31
|
+
|
|
32
|
+
This is the entry point for the new object-oriented API.
|
|
33
|
+
|
|
34
|
+
Usage:
|
|
35
|
+
from featrixsphere.api import FeatrixSphere
|
|
36
|
+
|
|
37
|
+
# Connect to FeatrixSphere
|
|
38
|
+
featrix = FeatrixSphere("https://sphere-api.featrix.com")
|
|
39
|
+
|
|
40
|
+
# Create foundational model
|
|
41
|
+
fm = featrix.create_foundational_model(
|
|
42
|
+
name="my_model",
|
|
43
|
+
csv_file="data.csv"
|
|
44
|
+
)
|
|
45
|
+
fm.wait_for_training()
|
|
46
|
+
|
|
47
|
+
# Create predictor
|
|
48
|
+
predictor = fm.create_classifier(
|
|
49
|
+
name="my_classifier",
|
|
50
|
+
target_column="target"
|
|
51
|
+
)
|
|
52
|
+
predictor.wait_for_training()
|
|
53
|
+
|
|
54
|
+
# Make predictions
|
|
55
|
+
result = predictor.predict({"feature1": "value1"})
|
|
56
|
+
print(result.predicted_class)
|
|
57
|
+
print(result.confidence)
|
|
58
|
+
|
|
59
|
+
On-Premises Deployment:
|
|
60
|
+
Featrix offers on-premises data processing with qualified NVIDIA
|
|
61
|
+
hardware configurations. The API works exactly the same - just
|
|
62
|
+
point your client to your on-premises endpoint:
|
|
63
|
+
|
|
64
|
+
featrix = FeatrixSphere("https://your-on-premises-server.com")
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
base_url: str = "https://sphere-api.featrix.com",
|
|
70
|
+
compute_cluster: Optional[str] = None,
|
|
71
|
+
default_max_retries: int = 5,
|
|
72
|
+
default_timeout: int = 30,
|
|
73
|
+
retry_base_delay: float = 2.0,
|
|
74
|
+
retry_max_delay: float = 60.0,
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Initialize the FeatrixSphere client.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
base_url: API server URL
|
|
81
|
+
compute_cluster: Compute cluster name (e.g., "burrito", "churro")
|
|
82
|
+
default_max_retries: Default retry count for failed requests
|
|
83
|
+
default_timeout: Default request timeout in seconds
|
|
84
|
+
retry_base_delay: Base delay for exponential backoff
|
|
85
|
+
retry_max_delay: Maximum delay for exponential backoff
|
|
86
|
+
"""
|
|
87
|
+
self._base_url = base_url.rstrip('/')
|
|
88
|
+
self._session = requests.Session()
|
|
89
|
+
self._session.timeout = default_timeout
|
|
90
|
+
|
|
91
|
+
# Set User-Agent
|
|
92
|
+
try:
|
|
93
|
+
from featrixsphere import __version__
|
|
94
|
+
self._session.headers.update({'User-Agent': f'FeatrixSphere {__version__}'})
|
|
95
|
+
except ImportError:
|
|
96
|
+
self._session.headers.update({'User-Agent': 'FeatrixSphere'})
|
|
97
|
+
|
|
98
|
+
# Compute cluster
|
|
99
|
+
self._compute_cluster = compute_cluster
|
|
100
|
+
if compute_cluster:
|
|
101
|
+
self._session.headers.update({'X-Featrix-Node': compute_cluster})
|
|
102
|
+
|
|
103
|
+
# Retry config
|
|
104
|
+
self._default_max_retries = default_max_retries
|
|
105
|
+
self._retry_base_delay = retry_base_delay
|
|
106
|
+
self._retry_max_delay = retry_max_delay
|
|
107
|
+
|
|
108
|
+
# Client context for resource classes
|
|
109
|
+
self._ctx = ClientContext(self)
|
|
110
|
+
|
|
111
|
+
def set_compute_cluster(self, cluster: Optional[str]) -> None:
|
|
112
|
+
"""
|
|
113
|
+
Set the compute cluster for all subsequent requests.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
cluster: Cluster name or None for default
|
|
117
|
+
"""
|
|
118
|
+
self._compute_cluster = cluster
|
|
119
|
+
if cluster:
|
|
120
|
+
self._session.headers.update({'X-Featrix-Node': cluster})
|
|
121
|
+
else:
|
|
122
|
+
self._session.headers.pop('X-Featrix-Node', None)
|
|
123
|
+
|
|
124
|
+
def create_foundational_model(
|
|
125
|
+
self,
|
|
126
|
+
name: Optional[str] = None,
|
|
127
|
+
csv_file: Optional[str] = None,
|
|
128
|
+
df: Optional['pd.DataFrame'] = None,
|
|
129
|
+
ignore_columns: Optional[List[str]] = None,
|
|
130
|
+
epochs: Optional[int] = None,
|
|
131
|
+
webhooks: Optional[Dict[str, str]] = None,
|
|
132
|
+
user_metadata: Optional[Dict[str, Any]] = None,
|
|
133
|
+
**kwargs
|
|
134
|
+
) -> FoundationalModel:
|
|
135
|
+
"""
|
|
136
|
+
Create a foundational model (embedding space).
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
name: Model name
|
|
140
|
+
csv_file: Path to CSV file with training data
|
|
141
|
+
df: DataFrame with training data (alternative to csv_file)
|
|
142
|
+
ignore_columns: Columns to ignore during training
|
|
143
|
+
epochs: Number of training epochs (None = auto)
|
|
144
|
+
webhooks: Webhook URLs for events
|
|
145
|
+
user_metadata: Custom metadata (max 32KB)
|
|
146
|
+
**kwargs: Additional parameters
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
FoundationalModel object (training started)
|
|
150
|
+
|
|
151
|
+
Example:
|
|
152
|
+
fm = featrix.create_foundational_model(
|
|
153
|
+
name="customer_embeddings",
|
|
154
|
+
csv_file="customers.csv",
|
|
155
|
+
ignore_columns=["id", "timestamp"]
|
|
156
|
+
)
|
|
157
|
+
fm.wait_for_training()
|
|
158
|
+
"""
|
|
159
|
+
# Prepare file content
|
|
160
|
+
if df is not None:
|
|
161
|
+
file_content, filename = self._dataframe_to_file(df)
|
|
162
|
+
elif csv_file:
|
|
163
|
+
file_content, filename = self._read_file(csv_file)
|
|
164
|
+
else:
|
|
165
|
+
raise ValueError("Either csv_file or df must be provided")
|
|
166
|
+
|
|
167
|
+
# Build form data
|
|
168
|
+
form_data = {}
|
|
169
|
+
if name:
|
|
170
|
+
form_data['name'] = name
|
|
171
|
+
if ignore_columns:
|
|
172
|
+
import json
|
|
173
|
+
form_data['ignore_columns'] = json.dumps(ignore_columns)
|
|
174
|
+
if epochs is not None:
|
|
175
|
+
form_data['epochs'] = str(epochs)
|
|
176
|
+
if webhooks:
|
|
177
|
+
import json
|
|
178
|
+
form_data['webhooks'] = json.dumps(webhooks)
|
|
179
|
+
if user_metadata:
|
|
180
|
+
import json
|
|
181
|
+
form_data['user_metadata'] = json.dumps(user_metadata)
|
|
182
|
+
|
|
183
|
+
# Add any extra kwargs
|
|
184
|
+
for key, value in kwargs.items():
|
|
185
|
+
if value is not None:
|
|
186
|
+
if isinstance(value, (dict, list)):
|
|
187
|
+
import json
|
|
188
|
+
form_data[key] = json.dumps(value)
|
|
189
|
+
else:
|
|
190
|
+
form_data[key] = str(value)
|
|
191
|
+
|
|
192
|
+
# Upload file and create session
|
|
193
|
+
files = {'file': (filename, file_content)}
|
|
194
|
+
|
|
195
|
+
response = self._post_multipart(
|
|
196
|
+
"/compute/upload_with_new_session/",
|
|
197
|
+
data=form_data,
|
|
198
|
+
files=files
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
session_id = response.get('session_id', '')
|
|
202
|
+
|
|
203
|
+
# Handle warnings
|
|
204
|
+
warnings = response.get('warnings', [])
|
|
205
|
+
if warnings:
|
|
206
|
+
for warning in warnings:
|
|
207
|
+
logger.warning(f"Upload warning: {warning}")
|
|
208
|
+
|
|
209
|
+
return FoundationalModel(
|
|
210
|
+
id=session_id,
|
|
211
|
+
name=name,
|
|
212
|
+
status="training",
|
|
213
|
+
created_at=None,
|
|
214
|
+
_ctx=self._ctx,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def foundational_model(self, fm_id: str) -> FoundationalModel:
|
|
218
|
+
"""
|
|
219
|
+
Get an existing foundational model by ID.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
fm_id: Foundational model (session) ID
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
FoundationalModel object
|
|
226
|
+
|
|
227
|
+
Example:
|
|
228
|
+
fm = featrix.foundational_model("abc123")
|
|
229
|
+
print(fm.status)
|
|
230
|
+
"""
|
|
231
|
+
return FoundationalModel.from_session_id(fm_id, self._ctx)
|
|
232
|
+
|
|
233
|
+
def predictor(self, predictor_id: str, session_id: Optional[str] = None) -> Predictor:
|
|
234
|
+
"""
|
|
235
|
+
Get an existing predictor by ID.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
predictor_id: Predictor ID
|
|
239
|
+
session_id: Session ID (required if predictor_id alone is not unique)
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Predictor object
|
|
243
|
+
|
|
244
|
+
Note:
|
|
245
|
+
In most cases, you should access predictors through the
|
|
246
|
+
FoundationalModel: fm.list_predictors()
|
|
247
|
+
"""
|
|
248
|
+
if not session_id:
|
|
249
|
+
# Try to find session from predictor ID
|
|
250
|
+
# This may not work for all cases
|
|
251
|
+
raise ValueError("session_id is required to load a predictor")
|
|
252
|
+
|
|
253
|
+
# Get predictor info from session
|
|
254
|
+
response = self._get_json(f"/session/{session_id}/predictor")
|
|
255
|
+
predictors_data = response.get('predictors', {})
|
|
256
|
+
|
|
257
|
+
if predictor_id not in predictors_data:
|
|
258
|
+
raise ValueError(f"Predictor {predictor_id} not found in session {session_id}")
|
|
259
|
+
|
|
260
|
+
pred_info = predictors_data[predictor_id]
|
|
261
|
+
|
|
262
|
+
return Predictor(
|
|
263
|
+
id=predictor_id,
|
|
264
|
+
session_id=session_id,
|
|
265
|
+
target_column=pred_info.get('target_column', ''),
|
|
266
|
+
target_type=pred_info.get('target_type', 'set'),
|
|
267
|
+
name=pred_info.get('name'),
|
|
268
|
+
status=pred_info.get('status'),
|
|
269
|
+
accuracy=pred_info.get('accuracy'),
|
|
270
|
+
_ctx=self._ctx,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
def vector_database(self, vdb_id: str) -> VectorDatabase:
|
|
274
|
+
"""
|
|
275
|
+
Get an existing vector database by ID.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
vdb_id: Vector database (session) ID
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
VectorDatabase object
|
|
282
|
+
"""
|
|
283
|
+
return VectorDatabase.from_session(vdb_id, ctx=self._ctx)
|
|
284
|
+
|
|
285
|
+
def api_endpoint(self, endpoint_id: str, session_id: str) -> APIEndpoint:
|
|
286
|
+
"""
|
|
287
|
+
Get an existing API endpoint by ID.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
endpoint_id: API endpoint ID
|
|
291
|
+
session_id: Session ID
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
APIEndpoint object
|
|
295
|
+
"""
|
|
296
|
+
response = self._get_json(f"/session/{session_id}/endpoint/{endpoint_id}")
|
|
297
|
+
|
|
298
|
+
return APIEndpoint.from_response(
|
|
299
|
+
response=response,
|
|
300
|
+
predictor_id=response.get('predictor_id', ''),
|
|
301
|
+
session_id=session_id,
|
|
302
|
+
ctx=self._ctx,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def get_notebook(self) -> FeatrixNotebookHelper:
|
|
306
|
+
"""
|
|
307
|
+
Get the Jupyter notebook visualization helper.
|
|
308
|
+
|
|
309
|
+
Returns a helper object with methods for visualizing training,
|
|
310
|
+
embedding spaces, and model analysis in Jupyter notebooks.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
FeatrixNotebookHelper instance
|
|
314
|
+
|
|
315
|
+
Example:
|
|
316
|
+
notebook = featrix.get_notebook()
|
|
317
|
+
fig = notebook.training_loss(fm)
|
|
318
|
+
fig.show()
|
|
319
|
+
"""
|
|
320
|
+
return FeatrixNotebookHelper(ctx=self._ctx)
|
|
321
|
+
|
|
322
|
+
def prediction_feedback(
|
|
323
|
+
self,
|
|
324
|
+
prediction_uuid: str,
|
|
325
|
+
ground_truth: Union[str, float]
|
|
326
|
+
) -> Dict[str, Any]:
|
|
327
|
+
"""
|
|
328
|
+
Send feedback for a prediction.
|
|
329
|
+
|
|
330
|
+
Convenience method that creates and sends feedback in one call.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
prediction_uuid: UUID from PredictionResult.prediction_uuid
|
|
334
|
+
ground_truth: The correct label/value
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Server response
|
|
338
|
+
"""
|
|
339
|
+
return PredictionFeedback.create_and_send(
|
|
340
|
+
ctx=self._ctx,
|
|
341
|
+
prediction_uuid=prediction_uuid,
|
|
342
|
+
ground_truth=ground_truth
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
def health_check(self) -> Dict[str, Any]:
|
|
346
|
+
"""
|
|
347
|
+
Check if the API server is healthy.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
Health status dictionary
|
|
351
|
+
"""
|
|
352
|
+
return self._get_json("/health")
|
|
353
|
+
|
|
354
|
+
def _dataframe_to_file(self, df: 'pd.DataFrame') -> tuple:
|
|
355
|
+
"""Convert DataFrame to file content and filename."""
|
|
356
|
+
# Try parquet first (more efficient)
|
|
357
|
+
try:
|
|
358
|
+
import pyarrow
|
|
359
|
+
buffer = io.BytesIO()
|
|
360
|
+
df.to_parquet(buffer, index=False)
|
|
361
|
+
return buffer.getvalue(), "data.parquet"
|
|
362
|
+
except ImportError:
|
|
363
|
+
pass
|
|
364
|
+
|
|
365
|
+
# Fall back to CSV
|
|
366
|
+
csv_buffer = io.StringIO()
|
|
367
|
+
df.to_csv(csv_buffer, index=False)
|
|
368
|
+
content = csv_buffer.getvalue().encode('utf-8')
|
|
369
|
+
|
|
370
|
+
# Compress if large
|
|
371
|
+
if len(content) > 100_000:
|
|
372
|
+
compressed = gzip.compress(content)
|
|
373
|
+
if len(compressed) < len(content):
|
|
374
|
+
return compressed, "data.csv.gz"
|
|
375
|
+
|
|
376
|
+
return content, "data.csv"
|
|
377
|
+
|
|
378
|
+
def _read_file(self, file_path: str) -> tuple:
|
|
379
|
+
"""Read file content and return with filename."""
|
|
380
|
+
path = Path(file_path)
|
|
381
|
+
filename = path.name
|
|
382
|
+
|
|
383
|
+
with open(path, 'rb') as f:
|
|
384
|
+
content = f.read()
|
|
385
|
+
|
|
386
|
+
# Compress if large and not already compressed
|
|
387
|
+
if len(content) > 100_000 and not filename.endswith('.gz'):
|
|
388
|
+
compressed = gzip.compress(content)
|
|
389
|
+
if len(compressed) < len(content):
|
|
390
|
+
return compressed, filename + '.gz'
|
|
391
|
+
|
|
392
|
+
return content, filename
|
|
393
|
+
|
|
394
|
+
def __repr__(self) -> str:
|
|
395
|
+
cluster_str = f", cluster='{self._compute_cluster}'" if self._compute_cluster else ""
|
|
396
|
+
return f"FeatrixSphere(url='{self._base_url}'{cluster_str})"
|