juniper-data-client 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_data_client/__init__.py +28 -0
- juniper_data_client/client.py +407 -0
- juniper_data_client/exceptions.py +37 -0
- juniper_data_client/py.typed +0 -0
- juniper_data_client-0.3.0.dist-info/METADATA +250 -0
- juniper_data_client-0.3.0.dist-info/RECORD +9 -0
- juniper_data_client-0.3.0.dist-info/WHEEL +5 -0
- juniper_data_client-0.3.0.dist-info/licenses/LICENSE +21 -0
- juniper_data_client-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""JuniperData Client - Python client library for the JuniperData REST API.
|
|
2
|
+
|
|
3
|
+
This package provides a simple, robust client for interacting with the JuniperData
|
|
4
|
+
dataset generation service, used by both JuniperCascor and JuniperCanopy.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from juniper_data_client.client import JuniperDataClient
|
|
8
|
+
from juniper_data_client.exceptions import (
|
|
9
|
+
JuniperDataClientError,
|
|
10
|
+
JuniperDataConfigurationError,
|
|
11
|
+
JuniperDataConnectionError,
|
|
12
|
+
JuniperDataNotFoundError,
|
|
13
|
+
JuniperDataTimeoutError,
|
|
14
|
+
JuniperDataValidationError,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__version__ = "0.3.0"
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"JuniperDataClient",
|
|
21
|
+
"JuniperDataClientError",
|
|
22
|
+
"JuniperDataConfigurationError",
|
|
23
|
+
"JuniperDataConnectionError",
|
|
24
|
+
"JuniperDataNotFoundError",
|
|
25
|
+
"JuniperDataTimeoutError",
|
|
26
|
+
"JuniperDataValidationError",
|
|
27
|
+
"__version__",
|
|
28
|
+
]
|
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""REST API client for JuniperData service integration.
|
|
2
|
+
|
|
3
|
+
Provides dataset creation, artifact download, and preview functionality
|
|
4
|
+
for JuniperCascor and JuniperCanopy applications.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import io
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import requests
|
|
15
|
+
from requests.adapters import HTTPAdapter
|
|
16
|
+
from urllib3.util.retry import Retry
|
|
17
|
+
|
|
18
|
+
from juniper_data_client.exceptions import (
|
|
19
|
+
JuniperDataClientError,
|
|
20
|
+
JuniperDataConnectionError,
|
|
21
|
+
JuniperDataNotFoundError,
|
|
22
|
+
JuniperDataTimeoutError,
|
|
23
|
+
JuniperDataValidationError,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class JuniperDataClient:
|
|
28
|
+
"""Client for interacting with the JuniperData REST API.
|
|
29
|
+
|
|
30
|
+
Provides methods for dataset creation, artifact retrieval, and service health
|
|
31
|
+
checking with automatic retry logic and connection pooling.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> client = JuniperDataClient("http://localhost:8100")
|
|
35
|
+
>>> result = client.create_dataset("spiral", {"n_spirals": 2, "seed": 42})
|
|
36
|
+
>>> arrays = client.download_artifact_npz(result["dataset_id"])
|
|
37
|
+
>>> X_train, y_train = arrays["X_train"], arrays["y_train"]
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
DEFAULT_TIMEOUT = 30
|
|
41
|
+
DEFAULT_RETRIES = 3
|
|
42
|
+
DEFAULT_BACKOFF_FACTOR = 0.5
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
base_url: str = "http://localhost:8100",
|
|
47
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
48
|
+
retries: int = DEFAULT_RETRIES,
|
|
49
|
+
backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
|
|
50
|
+
api_key: Optional[str] = None,
|
|
51
|
+
):
|
|
52
|
+
"""Initialize the JuniperData client.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
base_url: Base URL for the JuniperData API (default: http://localhost:8100)
|
|
56
|
+
timeout: Request timeout in seconds (default: 30)
|
|
57
|
+
retries: Number of retry attempts for failed requests (default: 3)
|
|
58
|
+
backoff_factor: Backoff factor for retry delays (default: 0.5)
|
|
59
|
+
api_key: API key for authentication. If not provided, reads from
|
|
60
|
+
JUNIPER_DATA_API_KEY environment variable.
|
|
61
|
+
"""
|
|
62
|
+
self.base_url = self._normalize_url(base_url)
|
|
63
|
+
self.timeout = timeout
|
|
64
|
+
self.retries = retries
|
|
65
|
+
self.backoff_factor = backoff_factor
|
|
66
|
+
self.session = self._create_session()
|
|
67
|
+
|
|
68
|
+
resolved_api_key = api_key or os.environ.get("JUNIPER_DATA_API_KEY")
|
|
69
|
+
if resolved_api_key:
|
|
70
|
+
self.session.headers["X-API-Key"] = resolved_api_key
|
|
71
|
+
|
|
72
|
+
def _normalize_url(self, url: str) -> str:
|
|
73
|
+
"""Normalize the base URL for consistent API calls.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
url: Raw URL string to normalize
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Normalized URL with scheme, no trailing slash, no /v1 suffix
|
|
80
|
+
"""
|
|
81
|
+
url = url.strip()
|
|
82
|
+
|
|
83
|
+
if not url.startswith(("http://", "https://")):
|
|
84
|
+
url = f"http://{url}"
|
|
85
|
+
|
|
86
|
+
parsed = urlparse(url)
|
|
87
|
+
normalized = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
|
|
88
|
+
normalized = normalized.rstrip("/")
|
|
89
|
+
|
|
90
|
+
if normalized.endswith("/v1"):
|
|
91
|
+
normalized = normalized[:-3]
|
|
92
|
+
|
|
93
|
+
return normalized
|
|
94
|
+
|
|
95
|
+
def _create_session(self) -> requests.Session:
|
|
96
|
+
"""Create a requests session with retry logic and connection pooling.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Configured requests.Session with retry adapter
|
|
100
|
+
"""
|
|
101
|
+
session = requests.Session()
|
|
102
|
+
|
|
103
|
+
retry_strategy = Retry(
|
|
104
|
+
total=self.retries,
|
|
105
|
+
backoff_factor=self.backoff_factor,
|
|
106
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
|
107
|
+
allowed_methods=["HEAD", "GET", "POST", "DELETE"],
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
adapter = HTTPAdapter(
|
|
111
|
+
max_retries=retry_strategy,
|
|
112
|
+
pool_connections=10,
|
|
113
|
+
pool_maxsize=10,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
session.mount("http://", adapter)
|
|
117
|
+
session.mount("https://", adapter)
|
|
118
|
+
|
|
119
|
+
return session
|
|
120
|
+
|
|
121
|
+
def _request(self, method: str, endpoint: str, **kwargs: Any) -> requests.Response:
|
|
122
|
+
"""Make an HTTP request with error handling.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
method: HTTP method (GET, POST, etc.)
|
|
126
|
+
endpoint: API endpoint path
|
|
127
|
+
**kwargs: Additional arguments passed to requests
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Response object
|
|
131
|
+
|
|
132
|
+
Raises:
|
|
133
|
+
JuniperDataConnectionError: On connection failure
|
|
134
|
+
JuniperDataTimeoutError: On request timeout
|
|
135
|
+
JuniperDataNotFoundError: On 404 response
|
|
136
|
+
JuniperDataValidationError: On 400/422 response
|
|
137
|
+
JuniperDataClientError: On other HTTP errors
|
|
138
|
+
"""
|
|
139
|
+
url = f"{self.base_url}{endpoint}"
|
|
140
|
+
kwargs.setdefault("timeout", self.timeout)
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
response = self.session.request(method, url, **kwargs)
|
|
144
|
+
except requests.exceptions.ConnectionError as e:
|
|
145
|
+
raise JuniperDataConnectionError(f"Failed to connect to JuniperData at {self.base_url}: {e}") from e
|
|
146
|
+
except requests.exceptions.Timeout as e:
|
|
147
|
+
raise JuniperDataTimeoutError(f"Request to {url} timed out after {self.timeout}s: {e}") from e
|
|
148
|
+
except requests.exceptions.RequestException as e:
|
|
149
|
+
raise JuniperDataClientError(f"Request failed: {e}") from e
|
|
150
|
+
|
|
151
|
+
if response.ok:
|
|
152
|
+
return response
|
|
153
|
+
|
|
154
|
+
error_detail = response.text
|
|
155
|
+
try:
|
|
156
|
+
error_json = response.json()
|
|
157
|
+
if "detail" in error_json:
|
|
158
|
+
error_detail = error_json["detail"]
|
|
159
|
+
except (ValueError, KeyError):
|
|
160
|
+
# If the response body is not valid JSON or lacks a 'detail' field,
|
|
161
|
+
# fall back to using the raw response text as the error detail.
|
|
162
|
+
error_detail = response.text
|
|
163
|
+
|
|
164
|
+
if response.status_code == 404:
|
|
165
|
+
raise JuniperDataNotFoundError(f"Resource not found: {error_detail}")
|
|
166
|
+
elif response.status_code in (400, 422):
|
|
167
|
+
raise JuniperDataValidationError(f"Validation error: {error_detail}")
|
|
168
|
+
else:
|
|
169
|
+
raise JuniperDataClientError(f"Request failed ({response.status_code}): {error_detail}")
|
|
170
|
+
|
|
171
|
+
def health_check(self) -> Dict[str, Any]:
|
|
172
|
+
"""Check if the JuniperData service is healthy.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Health status response from the service
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
JuniperDataConnectionError: If service is unreachable
|
|
179
|
+
"""
|
|
180
|
+
response = self._request("GET", "/v1/health")
|
|
181
|
+
return response.json()
|
|
182
|
+
|
|
183
|
+
def is_ready(self) -> bool:
|
|
184
|
+
"""Check if the JuniperData service is ready to accept requests.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
True if service is ready, False otherwise
|
|
188
|
+
"""
|
|
189
|
+
try:
|
|
190
|
+
response = self._request("GET", "/v1/health/ready")
|
|
191
|
+
return response.json().get("status") == "ready"
|
|
192
|
+
except JuniperDataClientError:
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
def wait_for_ready(self, timeout: float = 30.0, poll_interval: float = 0.5) -> bool:
|
|
196
|
+
"""Wait for the JuniperData service to become ready.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
timeout: Maximum time to wait in seconds (default: 30)
|
|
200
|
+
poll_interval: Time between readiness checks in seconds (default: 0.5)
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
True if service became ready within timeout, False otherwise
|
|
204
|
+
"""
|
|
205
|
+
start_time = time.time()
|
|
206
|
+
while time.time() - start_time < timeout:
|
|
207
|
+
if self.is_ready():
|
|
208
|
+
return True
|
|
209
|
+
time.sleep(poll_interval)
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
def list_generators(self) -> List[Dict[str, Any]]:
|
|
213
|
+
"""List available dataset generators.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
List of generator information dictionaries
|
|
217
|
+
"""
|
|
218
|
+
response = self._request("GET", "/v1/generators")
|
|
219
|
+
return response.json()
|
|
220
|
+
|
|
221
|
+
def get_generator_schema(self, name: str) -> Dict[str, Any]:
|
|
222
|
+
"""Get the parameter schema for a generator.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
name: Generator name (e.g., "spiral")
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
JSON schema for generator parameters
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
JuniperDataNotFoundError: If generator not found
|
|
232
|
+
"""
|
|
233
|
+
response = self._request("GET", f"/v1/generators/{name}/schema")
|
|
234
|
+
return response.json()
|
|
235
|
+
|
|
236
|
+
def create_dataset(self, generator: str, params: Dict[str, Any], persist: bool = True) -> Dict[str, Any]:
|
|
237
|
+
"""Create a new dataset via the JuniperData API.
|
|
238
|
+
|
|
239
|
+
If a dataset with the same parameters already exists, the existing
|
|
240
|
+
dataset is returned (caching behavior).
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
generator: Name of the dataset generator to use (e.g., "spiral")
|
|
244
|
+
params: Parameters to pass to the generator
|
|
245
|
+
persist: Whether to persist the dataset (default: True)
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Parsed JSON response containing dataset_id, generator, meta, and artifact_url
|
|
249
|
+
|
|
250
|
+
Raises:
|
|
251
|
+
JuniperDataValidationError: If parameters are invalid
|
|
252
|
+
JuniperDataNotFoundError: If generator not found
|
|
253
|
+
"""
|
|
254
|
+
payload = {
|
|
255
|
+
"generator": generator,
|
|
256
|
+
"params": params,
|
|
257
|
+
"persist": persist,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
response = self._request("POST", "/v1/datasets", json=payload)
|
|
261
|
+
return response.json()
|
|
262
|
+
|
|
263
|
+
def list_datasets(self, limit: int = 100, offset: int = 0) -> List[str]:
|
|
264
|
+
"""List dataset IDs.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
limit: Maximum number of dataset IDs to return (default: 100)
|
|
268
|
+
offset: Number of dataset IDs to skip (default: 0)
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
List of dataset ID strings
|
|
272
|
+
"""
|
|
273
|
+
response = self._request("GET", "/v1/datasets", params={"limit": limit, "offset": offset})
|
|
274
|
+
return response.json()
|
|
275
|
+
|
|
276
|
+
def get_dataset_metadata(self, dataset_id: str) -> Dict[str, Any]:
|
|
277
|
+
"""Get metadata for a specific dataset.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
dataset_id: Unique dataset identifier
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Dataset metadata dictionary
|
|
284
|
+
|
|
285
|
+
Raises:
|
|
286
|
+
JuniperDataNotFoundError: If dataset not found
|
|
287
|
+
"""
|
|
288
|
+
response = self._request("GET", f"/v1/datasets/{dataset_id}")
|
|
289
|
+
return response.json()
|
|
290
|
+
|
|
291
|
+
def download_artifact_bytes(self, dataset_id: str) -> bytes:
|
|
292
|
+
"""Download the raw NPZ artifact bytes for a dataset.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
dataset_id: ID of the dataset whose artifact to download
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Raw bytes of the NPZ file
|
|
299
|
+
|
|
300
|
+
Raises:
|
|
301
|
+
JuniperDataNotFoundError: If dataset not found
|
|
302
|
+
"""
|
|
303
|
+
response = self._request("GET", f"/v1/datasets/{dataset_id}/artifact")
|
|
304
|
+
return response.content
|
|
305
|
+
|
|
306
|
+
def download_artifact_npz(self, dataset_id: str) -> Dict[str, np.ndarray]:
|
|
307
|
+
"""Download and load an NPZ artifact for a dataset.
|
|
308
|
+
|
|
309
|
+
The returned dictionary contains numpy arrays with the standard keys:
|
|
310
|
+
- X_train, y_train: Training features and one-hot labels
|
|
311
|
+
- X_test, y_test: Test features and one-hot labels
|
|
312
|
+
- X_full, y_full: Full dataset features and one-hot labels
|
|
313
|
+
|
|
314
|
+
All arrays are float32 dtype.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
dataset_id: ID of the dataset whose artifact to download
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Dictionary mapping array names to numpy arrays
|
|
321
|
+
|
|
322
|
+
Raises:
|
|
323
|
+
JuniperDataNotFoundError: If dataset not found
|
|
324
|
+
"""
|
|
325
|
+
content = self.download_artifact_bytes(dataset_id)
|
|
326
|
+
npz_file = np.load(io.BytesIO(content))
|
|
327
|
+
return {key: npz_file[key] for key in npz_file.files}
|
|
328
|
+
|
|
329
|
+
def get_preview(self, dataset_id: str, n: int = 100) -> Dict[str, Any]:
|
|
330
|
+
"""Get a preview of dataset samples as JSON.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
dataset_id: ID of the dataset to preview
|
|
334
|
+
n: Number of samples to include in preview (default: 100, max: 1000)
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Dictionary containing n_samples, X_sample, and y_sample
|
|
338
|
+
|
|
339
|
+
Raises:
|
|
340
|
+
JuniperDataNotFoundError: If dataset not found
|
|
341
|
+
"""
|
|
342
|
+
response = self._request("GET", f"/v1/datasets/{dataset_id}/preview", params={"n": n})
|
|
343
|
+
return response.json()
|
|
344
|
+
|
|
345
|
+
def delete_dataset(self, dataset_id: str) -> bool:
|
|
346
|
+
"""Delete a dataset.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
dataset_id: Unique dataset identifier
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
True if dataset was deleted
|
|
353
|
+
|
|
354
|
+
Raises:
|
|
355
|
+
JuniperDataNotFoundError: If dataset not found
|
|
356
|
+
"""
|
|
357
|
+
self._request("DELETE", f"/v1/datasets/{dataset_id}")
|
|
358
|
+
return True
|
|
359
|
+
|
|
360
|
+
def create_spiral_dataset(
|
|
361
|
+
self,
|
|
362
|
+
n_spirals: int = 2,
|
|
363
|
+
n_points_per_spiral: int = 100,
|
|
364
|
+
noise: float = 0.1,
|
|
365
|
+
seed: Optional[int] = None,
|
|
366
|
+
algorithm: str = "modern",
|
|
367
|
+
train_ratio: float = 0.8,
|
|
368
|
+
**kwargs: Any,
|
|
369
|
+
) -> Dict[str, Any]:
|
|
370
|
+
"""Convenience method for creating spiral datasets.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
n_spirals: Number of spiral arms (default: 2)
|
|
374
|
+
n_points_per_spiral: Points per spiral arm (default: 100)
|
|
375
|
+
noise: Noise level (default: 0.1)
|
|
376
|
+
seed: Random seed for reproducibility (optional)
|
|
377
|
+
algorithm: Generation algorithm - "modern" or "legacy_cascor" (default: "modern")
|
|
378
|
+
train_ratio: Fraction of data for training (default: 0.8)
|
|
379
|
+
**kwargs: Additional parameters passed to the generator
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Dataset creation response with dataset_id and metadata
|
|
383
|
+
"""
|
|
384
|
+
params: Dict[str, Any] = {
|
|
385
|
+
"n_spirals": n_spirals,
|
|
386
|
+
"n_points_per_spiral": n_points_per_spiral,
|
|
387
|
+
"noise": noise,
|
|
388
|
+
"algorithm": algorithm,
|
|
389
|
+
"train_ratio": train_ratio,
|
|
390
|
+
}
|
|
391
|
+
if seed is not None:
|
|
392
|
+
params["seed"] = seed
|
|
393
|
+
params.update(kwargs)
|
|
394
|
+
|
|
395
|
+
return self.create_dataset("spiral", params)
|
|
396
|
+
|
|
397
|
+
def close(self) -> None:
|
|
398
|
+
"""Close the client session and release resources."""
|
|
399
|
+
self.session.close()
|
|
400
|
+
|
|
401
|
+
def __enter__(self) -> "JuniperDataClient":
|
|
402
|
+
"""Context manager entry."""
|
|
403
|
+
return self
|
|
404
|
+
|
|
405
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
406
|
+
"""Context manager exit - closes the session."""
|
|
407
|
+
self.close()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Custom exceptions for the JuniperData client library."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class JuniperDataClientError(Exception):
|
|
5
|
+
"""Base exception for all JuniperData client errors."""
|
|
6
|
+
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JuniperDataConnectionError(JuniperDataClientError):
|
|
11
|
+
"""Raised when connection to JuniperData service fails."""
|
|
12
|
+
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JuniperDataTimeoutError(JuniperDataClientError):
|
|
17
|
+
"""Raised when a request to JuniperData times out."""
|
|
18
|
+
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JuniperDataNotFoundError(JuniperDataClientError):
|
|
23
|
+
"""Raised when a requested resource is not found (404)."""
|
|
24
|
+
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class JuniperDataValidationError(JuniperDataClientError):
|
|
29
|
+
"""Raised when request parameters fail validation (400/422)."""
|
|
30
|
+
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class JuniperDataConfigurationError(JuniperDataClientError):
|
|
35
|
+
"""Raised when JuniperData configuration is missing or invalid."""
|
|
36
|
+
|
|
37
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: juniper-data-client
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: HTTP client for the JuniperData dataset generation service
|
|
5
|
+
Author: Paul Calnon
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/pcalnon/juniper-data-client
|
|
8
|
+
Project-URL: Repository, https://github.com/pcalnon/juniper-data-client
|
|
9
|
+
Project-URL: Documentation, https://github.com/pcalnon/juniper-data-client#readme
|
|
10
|
+
Project-URL: Issues, https://github.com/pcalnon/juniper-data-client/issues
|
|
11
|
+
Keywords: juniper,dataset,machine-learning,api-client
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.11
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: numpy>=1.24.0
|
|
28
|
+
Requires-Dist: requests>=2.28.0
|
|
29
|
+
Requires-Dist: urllib3>=2.0.0
|
|
30
|
+
Provides-Extra: test
|
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
32
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
33
|
+
Requires-Dist: pytest-timeout>=2.2.0; extra == "test"
|
|
34
|
+
Requires-Dist: responses>=0.23.0; extra == "test"
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: juniper-data-client[test]; extra == "dev"
|
|
37
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
39
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
40
|
+
Requires-Dist: flake8>=7.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: types-requests>=2.28.0; extra == "dev"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# juniper-data-client
|
|
45
|
+
|
|
46
|
+
Python client library for the JuniperData REST API.
|
|
47
|
+
|
|
48
|
+
## Overview
|
|
49
|
+
|
|
50
|
+
`juniper-data-client` provides a simple, robust client for interacting with the JuniperData dataset generation service. It is the official client library used by both JuniperCascor (neural network backend) and JuniperCanopy (web dashboard).
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install juniper-data-client
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Or install from source:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
cd juniper-data-client
|
|
62
|
+
pip install -e .
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Quick Start
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from juniper_data_client import JuniperDataClient
|
|
69
|
+
|
|
70
|
+
# Create client (default: localhost:8100)
|
|
71
|
+
client = JuniperDataClient("http://localhost:8100")
|
|
72
|
+
|
|
73
|
+
# Check service health
|
|
74
|
+
health = client.health_check()
|
|
75
|
+
print(f"Service status: {health['status']}")
|
|
76
|
+
|
|
77
|
+
# Create a spiral dataset
|
|
78
|
+
result = client.create_spiral_dataset(
|
|
79
|
+
n_spirals=2,
|
|
80
|
+
n_points_per_spiral=100,
|
|
81
|
+
noise=0.1,
|
|
82
|
+
seed=42,
|
|
83
|
+
)
|
|
84
|
+
dataset_id = result["dataset_id"]
|
|
85
|
+
print(f"Created dataset: {dataset_id}")
|
|
86
|
+
|
|
87
|
+
# Download as numpy arrays
|
|
88
|
+
arrays = client.download_artifact_npz(dataset_id)
|
|
89
|
+
X_train = arrays["X_train"] # (160, 2) float32
|
|
90
|
+
y_train = arrays["y_train"] # (160, 2) float32 one-hot
|
|
91
|
+
X_test = arrays["X_test"] # (40, 2) float32
|
|
92
|
+
y_test = arrays["y_test"] # (40, 2) float32 one-hot
|
|
93
|
+
|
|
94
|
+
print(f"Training samples: {len(X_train)}")
|
|
95
|
+
print(f"Test samples: {len(X_test)}")
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Features
|
|
99
|
+
|
|
100
|
+
- **Simple API**: Easy-to-use methods for all JuniperData endpoints
|
|
101
|
+
- **Automatic Retries**: Built-in retry logic for transient failures (429, 5xx)
|
|
102
|
+
- **Connection Pooling**: Efficient HTTP connection reuse
|
|
103
|
+
- **Type Hints**: Full type annotations for IDE support
|
|
104
|
+
- **Context Manager**: Resource cleanup with `with` statement
|
|
105
|
+
- **Custom Exceptions**: Granular error handling
|
|
106
|
+
|
|
107
|
+
## Usage Examples
|
|
108
|
+
|
|
109
|
+
### Context Manager
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
with JuniperDataClient("http://localhost:8100") as client:
|
|
113
|
+
result = client.create_spiral_dataset(seed=42)
|
|
114
|
+
arrays = client.download_artifact_npz(result["dataset_id"])
|
|
115
|
+
# Session automatically closed
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Wait for Service
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
client = JuniperDataClient("http://localhost:8100")
|
|
122
|
+
|
|
123
|
+
# Wait up to 30 seconds for service to be ready
|
|
124
|
+
if client.wait_for_ready(timeout=30):
|
|
125
|
+
result = client.create_spiral_dataset(seed=42)
|
|
126
|
+
else:
|
|
127
|
+
print("Service not available")
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Custom Parameters
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
# Using the general create_dataset method
|
|
134
|
+
result = client.create_dataset(
|
|
135
|
+
generator="spiral",
|
|
136
|
+
params={
|
|
137
|
+
"n_spirals": 3,
|
|
138
|
+
"n_points_per_spiral": 200,
|
|
139
|
+
"noise": 0.2,
|
|
140
|
+
"seed": 12345,
|
|
141
|
+
"algorithm": "legacy_cascor",
|
|
142
|
+
"radius": 10.0,
|
|
143
|
+
}
|
|
144
|
+
)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Error Handling
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from juniper_data_client import (
|
|
151
|
+
JuniperDataClient,
|
|
152
|
+
JuniperDataConnectionError,
|
|
153
|
+
JuniperDataNotFoundError,
|
|
154
|
+
JuniperDataValidationError,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
client = JuniperDataClient()
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
result = client.create_dataset("spiral", {"n_spirals": -1})
|
|
161
|
+
except JuniperDataValidationError as e:
|
|
162
|
+
print(f"Invalid parameters: {e}")
|
|
163
|
+
except JuniperDataConnectionError as e:
|
|
164
|
+
print(f"Service unreachable: {e}")
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### PyTorch Integration
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
import torch
|
|
171
|
+
from juniper_data_client import JuniperDataClient
|
|
172
|
+
|
|
173
|
+
client = JuniperDataClient()
|
|
174
|
+
result = client.create_spiral_dataset(seed=42)
|
|
175
|
+
arrays = client.download_artifact_npz(result["dataset_id"])
|
|
176
|
+
|
|
177
|
+
# Convert to PyTorch tensors
|
|
178
|
+
X_train = torch.from_numpy(arrays["X_train"]) # torch.float32
|
|
179
|
+
y_train = torch.from_numpy(arrays["y_train"]) # torch.float32
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## API Reference
|
|
183
|
+
|
|
184
|
+
### JuniperDataClient
|
|
185
|
+
|
|
186
|
+
| Method | Description |
|
|
187
|
+
| ----------------------------------- | -------------------------------------- |
|
|
188
|
+
| `health_check()` | Get service health status |
|
|
189
|
+
| `is_ready()` | Check if service is ready (boolean) |
|
|
190
|
+
| `wait_for_ready(timeout)` | Wait for service to become ready |
|
|
191
|
+
| `list_generators()` | List available generators |
|
|
192
|
+
| `get_generator_schema(name)` | Get parameter schema for generator |
|
|
193
|
+
| `create_dataset(generator, params)` | Create dataset with generator |
|
|
194
|
+
| `create_spiral_dataset(**kwargs)` | Convenience method for spiral datasets |
|
|
195
|
+
| `list_datasets(limit, offset)` | List dataset IDs |
|
|
196
|
+
| `get_dataset_metadata(id)` | Get dataset metadata |
|
|
197
|
+
| `download_artifact_npz(id)` | Download NPZ as dict of arrays |
|
|
198
|
+
| `download_artifact_bytes(id)` | Download raw NPZ bytes |
|
|
199
|
+
| `get_preview(id, n)` | Get JSON preview of samples |
|
|
200
|
+
| `delete_dataset(id)` | Delete a dataset |
|
|
201
|
+
| `close()` | Close the client session |
|
|
202
|
+
|
|
203
|
+
### Exceptions
|
|
204
|
+
|
|
205
|
+
| Exception | Description |
|
|
206
|
+
| ---------------------------- | ----------------------------- |
|
|
207
|
+
| `JuniperDataClientError` | Base exception for all errors |
|
|
208
|
+
| `JuniperDataConnectionError` | Connection to service failed |
|
|
209
|
+
| `JuniperDataTimeoutError` | Request timed out |
|
|
210
|
+
| `JuniperDataNotFoundError` | Resource not found (404) |
|
|
211
|
+
| `JuniperDataValidationError` | Invalid parameters (400/422) |
|
|
212
|
+
|
|
213
|
+
## NPZ Artifact Schema
|
|
214
|
+
|
|
215
|
+
Downloaded artifacts contain the following numpy arrays (all `float32`):
|
|
216
|
+
|
|
217
|
+
| Key | Shape | Description |
|
|
218
|
+
| --------- | ---------------------- | ----------------------------- |
|
|
219
|
+
| `X_train` | `(n_train, 2)` | Training features |
|
|
220
|
+
| `y_train` | `(n_train, n_classes)` | Training labels (one-hot) |
|
|
221
|
+
| `X_test` | `(n_test, 2)` | Test features |
|
|
222
|
+
| `y_test` | `(n_test, n_classes)` | Test labels (one-hot) |
|
|
223
|
+
| `X_full` | `(n_total, 2)` | Full dataset features |
|
|
224
|
+
| `y_full` | `(n_total, n_classes)` | Full dataset labels (one-hot) |
|
|
225
|
+
|
|
226
|
+
## Configuration
|
|
227
|
+
|
|
228
|
+
| Parameter | Default | Description |
|
|
229
|
+
| ---------------- | ----------------------- | ---------------------------------- |
|
|
230
|
+
| `base_url` | `http://localhost:8100` | JuniperData service URL |
|
|
231
|
+
| `timeout` | `30` | Request timeout in seconds |
|
|
232
|
+
| `retries` | `3` | Number of retry attempts |
|
|
233
|
+
| `backoff_factor` | `0.5` | Backoff multiplier between retries |
|
|
234
|
+
|
|
235
|
+
## Requirements
|
|
236
|
+
|
|
237
|
+
- Python >=3.11
|
|
238
|
+
- numpy >=1.24.0
|
|
239
|
+
- requests >=2.28.0
|
|
240
|
+
- urllib3 >=2.0.0
|
|
241
|
+
|
|
242
|
+
## License
|
|
243
|
+
|
|
244
|
+
MIT License - Copyright (c) 2024-2026 Paul Calnon
|
|
245
|
+
|
|
246
|
+
## See Also
|
|
247
|
+
|
|
248
|
+
- [JuniperData](https://github.com/pcalnon/Juniper/tree/main/JuniperData)
|
|
249
|
+
- [JuniperCascor](https://github.com/pcalnon/Juniper/tree/main/JuniperCascor)
|
|
250
|
+
- [JuniperCanopy](https://github.com/pcalnon/Juniper/tree/main/JuniperCanopy)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
juniper_data_client/__init__.py,sha256=qHsZY6awA6GZmcksi7GGGNKBrbsKuYo6ukR4LAs-RgI,809
|
|
2
|
+
juniper_data_client/client.py,sha256=Fkblt8BrMeJ0S9EoVXExlADcYVISXf9JaBsVQE1Wddc,13904
|
|
3
|
+
juniper_data_client/exceptions.py,sha256=PiZynlAebexJl-rm5x9yPzGwc9oSshhxSpgL0tI-Zho,844
|
|
4
|
+
juniper_data_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
juniper_data_client-0.3.0.dist-info/licenses/LICENSE,sha256=dyw35rgU8rmukcWEiETN0fwCwKz7donu9vGNP5LgBW8,1073
|
|
6
|
+
juniper_data_client-0.3.0.dist-info/METADATA,sha256=3jy6AEsSFiSUdE2Gr2k8ii7u1fO7kvWBJaDoZ4n6mdc,8670
|
|
7
|
+
juniper_data_client-0.3.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
8
|
+
juniper_data_client-0.3.0.dist-info/top_level.txt,sha256=JKjJnFGsHKzI9pOYlHh3uS4LzBR3lQ9q13mKfHrGdu0,20
|
|
9
|
+
juniper_data_client-0.3.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2026 Paul Calnon
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
juniper_data_client
|