datacrunch 1.8.4__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .inference_client import InferenceClient, InferenceResponse
2
+
3
+ __all__ = ['InferenceClient', 'InferenceResponse']
@@ -0,0 +1,343 @@
1
+ from dataclasses import dataclass
2
+ from dataclasses_json import dataclass_json, Undefined # type: ignore
3
+ import requests
4
+ from requests.structures import CaseInsensitiveDict
5
+ from typing import Optional, Dict, Any, Union, Generator
6
+ from urllib.parse import urlparse
7
+ from enum import Enum
8
+
9
+ class InferenceClientError(Exception):
10
+ """Base exception for InferenceClient errors."""
11
+ pass
12
+
13
+ class AsyncStatus(int, Enum):
14
+ Initialized = 0
15
+ Queue = 1
16
+ Inference = 2
17
+ Completed = 3
18
+
19
+ @dataclass_json(undefined=Undefined.EXCLUDE)
20
+ @dataclass
21
+ class InferenceResponse:
22
+ headers: CaseInsensitiveDict[str]
23
+ status_code: int
24
+ status_text: str
25
+ _original_response: requests.Response
26
+ _stream: bool = False
27
+
28
+ def _is_stream_response(self, headers: CaseInsensitiveDict[str]) -> bool:
29
+ """Check if the response headers indicate a streaming response.
30
+
31
+ Args:
32
+ headers: The response headers to check
33
+
34
+ Returns:
35
+ bool: True if the response is likely a stream, False otherwise
36
+ """
37
+ # Standard chunked transfer encoding
38
+ is_chunked_transfer = headers.get(
39
+ 'Transfer-Encoding', '').lower() == 'chunked'
40
+ # Server-Sent Events content type
41
+ is_event_stream = headers.get(
42
+ 'Content-Type', '').lower() == 'text/event-stream'
43
+ # NDJSON
44
+ is_ndjson = headers.get(
45
+ 'Content-Type', '').lower() == 'application/x-ndjson'
46
+ # Stream JSON
47
+ is_stream_json = headers.get(
48
+ 'Content-Type', '').lower() == 'application/stream+json'
49
+ # Keep-alive
50
+ is_keep_alive = headers.get(
51
+ 'Connection', '').lower() == 'keep-alive'
52
+ # No content length
53
+ has_no_content_length = 'Content-Length' not in headers
54
+
55
+ # No Content-Length with keep-alive often suggests streaming (though not definitive)
56
+ is_keep_alive_and_no_content_length = is_keep_alive and has_no_content_length
57
+
58
+ return (self._stream or is_chunked_transfer or is_event_stream or is_ndjson or
59
+ is_stream_json or is_keep_alive_and_no_content_length)
60
+
61
+ def output(self, is_text: bool = False) -> Any:
62
+ try:
63
+ if is_text:
64
+ return self._original_response.text
65
+ return self._original_response.json()
66
+ except Exception as e:
67
+ # if the response is a stream (check headers), raise relevant error
68
+ if self._is_stream_response(self._original_response.headers):
69
+ raise InferenceClientError(
70
+ f"Response might be a stream, use the stream method instead")
71
+ raise InferenceClientError(
72
+ f"Failed to parse response as JSON: {str(e)}")
73
+
74
+ def stream(self, chunk_size: int = 512, as_text: bool = True) -> Generator[Any, None, None]:
75
+ """Stream the response content.
76
+
77
+ Args:
78
+ chunk_size: Size of chunks to stream, in bytes
79
+ as_text: If True, stream as text using iter_lines. If False, stream as binary using iter_content.
80
+
81
+ Returns:
82
+ Generator yielding chunks of the response
83
+ """
84
+ if as_text:
85
+ for chunk in self._original_response.iter_lines(chunk_size=chunk_size):
86
+ if chunk:
87
+ yield chunk
88
+ else:
89
+ for chunk in self._original_response.iter_content(chunk_size=chunk_size):
90
+ if chunk:
91
+ yield chunk
92
+
93
+
94
+ class InferenceClient:
95
+ def __init__(self, inference_key: str, endpoint_base_url: str, timeout_seconds: int = 60 * 5) -> None:
96
+ """
97
+ Initialize the InferenceClient.
98
+
99
+ Args:
100
+ inference_key: The authentication key for the API
101
+ endpoint_base_url: The base URL for the API
102
+ timeout_seconds: Request timeout in seconds
103
+
104
+ Raises:
105
+ InferenceClientError: If the parameters are invalid
106
+ """
107
+ if not inference_key:
108
+ raise InferenceClientError("inference_key cannot be empty")
109
+
110
+ parsed_url = urlparse(endpoint_base_url)
111
+ if not parsed_url.scheme or not parsed_url.netloc:
112
+ raise InferenceClientError("endpoint_base_url must be a valid URL")
113
+
114
+ self.inference_key = inference_key
115
+ self.endpoint_base_url = endpoint_base_url.rstrip('/')
116
+ self.base_domain = self.endpoint_base_url[:self.endpoint_base_url.rindex(
117
+ '/')]
118
+ self.deployment_name = self.endpoint_base_url[self.endpoint_base_url.rindex(
119
+ '/')+1:]
120
+ self.timeout_seconds = timeout_seconds
121
+ self._session = requests.Session()
122
+ self._global_headers = {
123
+ 'Authorization': f'Bearer {inference_key}',
124
+ 'Content-Type': 'application/json'
125
+ }
126
+
127
+ def __enter__(self):
128
+ return self
129
+
130
+ def __exit__(self, exc_type, exc_val, exc_tb):
131
+ self._session.close()
132
+
133
+ @property
134
+ def global_headers(self) -> Dict[str, str]:
135
+ """
136
+ Get the current global headers that will be used for all requests.
137
+
138
+ Returns:
139
+ Dictionary of current global headers
140
+ """
141
+ return self._global_headers.copy()
142
+
143
+ def set_global_header(self, key: str, value: str) -> None:
144
+ """
145
+ Set or update a global header that will be used for all requests.
146
+
147
+ Args:
148
+ key: Header name
149
+ value: Header value
150
+ """
151
+ self._global_headers[key] = value
152
+
153
+ def set_global_headers(self, headers: Dict[str, str]) -> None:
154
+ """
155
+ Set multiple global headers at once that will be used for all requests.
156
+
157
+ Args:
158
+ headers: Dictionary of headers to set globally
159
+ """
160
+ self._global_headers.update(headers)
161
+
162
+ def remove_global_header(self, key: str) -> None:
163
+ """
164
+ Remove a global header.
165
+
166
+ Args:
167
+ key: Header name to remove from global headers
168
+ """
169
+ if key in self._global_headers:
170
+ del self._global_headers[key]
171
+
172
+ def _build_url(self, path: str) -> str:
173
+ """Construct the full URL by joining the base URL with the path."""
174
+ return f"{self.endpoint_base_url}/{path.lstrip('/')}"
175
+
176
+ def _build_request_headers(self, request_headers: Optional[Dict[str, str]] = None) -> Dict[str, str]:
177
+ """
178
+ Build the final headers by merging global headers with request-specific headers.
179
+
180
+ Args:
181
+ request_headers: Optional headers specific to this request
182
+
183
+ Returns:
184
+ Merged headers dictionary
185
+ """
186
+ headers = self._global_headers.copy()
187
+ if request_headers:
188
+ headers.update(request_headers)
189
+ return headers
190
+
191
+ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response:
192
+ """
193
+ Make an HTTP request with error handling.
194
+
195
+ Args:
196
+ method: HTTP method to use
197
+ path: API endpoint path
198
+ **kwargs: Additional arguments to pass to the request
199
+
200
+ Returns:
201
+ Response object from the request
202
+
203
+ Raises:
204
+ InferenceClientError: If the request fails
205
+ """
206
+ timeout = kwargs.pop('timeout_seconds', self.timeout_seconds)
207
+ try:
208
+ response = self._session.request(
209
+ method=method,
210
+ url=self._build_url(path),
211
+ headers=self._build_request_headers(
212
+ kwargs.pop('headers', None)),
213
+ timeout=timeout,
214
+ **kwargs
215
+ )
216
+ response.raise_for_status()
217
+ return response
218
+ except requests.exceptions.Timeout:
219
+ raise InferenceClientError(
220
+ f"Request to {path} timed out after {timeout} seconds")
221
+ except requests.exceptions.RequestException as e:
222
+ raise InferenceClientError(f"Request to {path} failed: {str(e)}")
223
+
224
+ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False):
225
+ response = self._make_request(
226
+ http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream)
227
+
228
+ return InferenceResponse(
229
+ headers=response.headers,
230
+ status_code=response.status_code,
231
+ status_text=response.reason,
232
+ _original_response=response
233
+ )
234
+
235
+ def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False):
236
+ # Add relevant headers to the request, to indicate that the request is async
237
+ headers = headers or {}
238
+ if no_response:
239
+ # If no_response is True, use the "Prefer: respond-async-proxy" header to run async and don't wait for the response
240
+ headers['Prefer'] = 'respond-async-proxy'
241
+ self._make_request(
242
+ http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers)
243
+ return
244
+ # Add the "Prefer: respond-async" header to the request, to run async and wait for the response
245
+ headers['Prefer'] = 'respond-async'
246
+
247
+ response = self._make_request(
248
+ http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers)
249
+
250
+ result = response.json()
251
+ execution_id = result['Id']
252
+
253
+ return AsyncInferenceExecution(self, execution_id, AsyncStatus.Initialized)
254
+
255
+ def get(self, path: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
256
+ return self._make_request('GET', path, params=params, headers=headers, timeout_seconds=timeout_seconds)
257
+
258
+ def post(self, path: str, json: Optional[Dict[str, Any]] = None, data: Optional[Union[str, Dict[str, Any]]] = None,
259
+ params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
260
+ return self._make_request('POST', path, json=json, data=data, params=params, headers=headers, timeout_seconds=timeout_seconds)
261
+
262
+ def put(self, path: str, json: Optional[Dict[str, Any]] = None, data: Optional[Union[str, Dict[str, Any]]] = None,
263
+ params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
264
+ return self._make_request('PUT', path, json=json, data=data, params=params, headers=headers, timeout_seconds=timeout_seconds)
265
+
266
+ def delete(self, path: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
267
+ return self._make_request('DELETE', path, params=params, headers=headers, timeout_seconds=timeout_seconds)
268
+
269
+ def patch(self, path: str, json: Optional[Dict[str, Any]] = None, data: Optional[Union[str, Dict[str, Any]]] = None,
270
+ params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
271
+ return self._make_request('PATCH', path, json=json, data=data, params=params, headers=headers, timeout_seconds=timeout_seconds)
272
+
273
+ def head(self, path: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
274
+ return self._make_request('HEAD', path, params=params, headers=headers, timeout_seconds=timeout_seconds)
275
+
276
+ def options(self, path: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, timeout_seconds: Optional[int] = None) -> requests.Response:
277
+ return self._make_request('OPTIONS', path, params=params, headers=headers, timeout_seconds=timeout_seconds)
278
+
279
+ def health(self, healthcheck_path: str = "/health") -> requests.Response:
280
+ """
281
+ Check the health status of the API.
282
+
283
+ Returns:
284
+ requests.Response: The response from the health check
285
+
286
+ Raises:
287
+ InferenceClientError: If the health check fails
288
+ """
289
+ try:
290
+ return self.get(healthcheck_path)
291
+ except InferenceClientError as e:
292
+ raise InferenceClientError(f"Health check failed: {str(e)}")
293
+
294
+
295
+ @dataclass_json(undefined=Undefined.EXCLUDE)
296
+ @dataclass
297
+ class AsyncInferenceExecution:
298
+ _inference_client: 'InferenceClient'
299
+ id: str
300
+ _status: AsyncStatus
301
+ INFERENCE_ID_HEADER = 'X-Inference-Id'
302
+
303
+ def status(self) -> AsyncStatus:
304
+ """Get the current stored status of the async inference execution. Only the status value type
305
+
306
+ Returns:
307
+ AsyncStatus: The status object
308
+ """
309
+
310
+ return self._status
311
+
312
+ def status_json(self) -> Dict[str, Any]:
313
+ """Get the current status of the async inference execution. Return the status json
314
+
315
+ Returns:
316
+ Dict[str, Any]: The status response containing the execution status and other metadata
317
+ """
318
+ url = f'{self._inference_client.base_domain}/status/{self._inference_client.deployment_name}'
319
+ response = self._inference_client._session.get(
320
+ url, headers=self._inference_client._build_request_headers({self.INFERENCE_ID_HEADER: self.id}))
321
+
322
+ response_json = response.json()
323
+ self._status = AsyncStatus(response_json['Status'])
324
+
325
+ return response_json
326
+
327
+ def result(self) -> Dict[str, Any]:
328
+ """Get the results of the async inference execution.
329
+
330
+ Returns:
331
+ Dict[str, Any]: The results of the inference execution
332
+ """
333
+ url = f'{self._inference_client.base_domain}/result/{self._inference_client.deployment_name}'
334
+ response = self._inference_client._session.get(
335
+ url, headers=self._inference_client._build_request_headers({self.INFERENCE_ID_HEADER: self.id}))
336
+
337
+ if response.headers['Content-Type'] == 'application/json':
338
+ return response.json()
339
+ else:
340
+ return {'result': response.text}
341
+
342
+ # alias for get_results
343
+ output = result
datacrunch/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '1.8.4'
1
+ VERSION = '1.10.0'