blaxel 0.1.9rc35__py3-none-any.whl → 0.1.9rc37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blaxel/agents/__init__.py +53 -16
- blaxel/authentication/__init__.py +3 -4
- blaxel/client/api/compute/__init__.py +0 -0
- blaxel/client/api/compute/create_sandbox.py +166 -0
- blaxel/client/api/compute/delete_sandbox.py +154 -0
- blaxel/client/api/compute/get_sandbox.py +154 -0
- blaxel/client/api/compute/list_sandboxes.py +135 -0
- blaxel/client/api/compute/start_sandbox.py +157 -0
- blaxel/client/api/compute/stop_sandbox.py +157 -0
- blaxel/client/api/compute/update_sandbox.py +179 -0
- blaxel/client/api/default/list_sandbox_hub_definitions.py +123 -0
- blaxel/client/api/functions/list_function_revisions.py +16 -11
- blaxel/client/api/knowledgebases/list_knowledgebase_revisions.py +16 -11
- blaxel/client/api/models/list_model_revisions.py +16 -11
- blaxel/client/api/templates/list_templates.py +16 -11
- blaxel/client/models/__init__.py +32 -2
- blaxel/client/models/agent_spec.py +25 -69
- blaxel/client/models/core_spec.py +1 -45
- blaxel/client/models/function_spec.py +1 -45
- blaxel/client/models/last_n_requests_metric.py +18 -0
- blaxel/client/models/metrics.py +20 -0
- blaxel/client/models/model_spec.py +1 -45
- blaxel/client/models/{agent_chain.py → port.py} +23 -32
- blaxel/client/models/request_total_metric.py +12 -1
- blaxel/client/models/request_total_response_data.py +97 -0
- blaxel/client/models/resource_log.py +9 -0
- blaxel/client/models/resource_metrics.py +144 -0
- blaxel/client/models/resource_metrics_request_total_per_code_previous.py +45 -0
- blaxel/client/models/resource_metrics_rps_per_code_previous.py +45 -0
- blaxel/client/models/runtime.py +83 -7
- blaxel/client/models/runtime_configuration.py +45 -0
- blaxel/client/models/sandbox.py +129 -0
- blaxel/client/models/sandbox_definition.py +181 -0
- blaxel/client/models/sandbox_spec.py +208 -0
- blaxel/client/models/sandboxes.py +129 -0
- blaxel/client/models/serverless_config.py +29 -1
- blaxel/client/models/serverless_config_configuration.py +45 -0
- blaxel/client/models/start_sandbox.py +94 -0
- blaxel/client/models/stop_sandbox.py +94 -0
- blaxel/client/models/trigger.py +98 -0
- blaxel/client/models/trigger_configuration.py +45 -0
- blaxel/client/models/workspace.py +20 -0
- blaxel/client/models/workspace_runtime.py +61 -0
- blaxel/common/autoload.py +0 -4
- blaxel/common/internal.py +75 -0
- blaxel/common/settings.py +6 -1
- blaxel/instrumentation/exporters.py +3 -6
- blaxel/instrumentation/manager.py +5 -3
- blaxel/mcp/client.py +1 -3
- blaxel/mcp/server.py +4 -4
- blaxel/models/__init__.py +2 -1
- blaxel/models/custom/langchain/gemini.py +41 -18
- blaxel/models/custom/llamaindex/cohere.py +25 -16
- blaxel/models/custom/pydantic/gemini.py +0 -1
- blaxel/models/livekit.py +1 -1
- blaxel/tools/__init__.py +63 -22
- blaxel/tools/langchain.py +1 -2
- {blaxel-0.1.9rc35.dist-info → blaxel-0.1.9rc37.dist-info}/METADATA +1 -4
- {blaxel-0.1.9rc35.dist-info → blaxel-0.1.9rc37.dist-info}/RECORD +61 -37
- {blaxel-0.1.9rc35.dist-info → blaxel-0.1.9rc37.dist-info}/WHEEL +0 -0
- {blaxel-0.1.9rc35.dist-info → blaxel-0.1.9rc37.dist-info}/licenses/LICENSE +0 -0
@@ -12,7 +12,11 @@ if TYPE_CHECKING:
|
|
12
12
|
from ..models.request_duration_over_time_metrics import RequestDurationOverTimeMetrics
|
13
13
|
from ..models.request_total_by_origin_metric import RequestTotalByOriginMetric
|
14
14
|
from ..models.resource_metrics_request_total_per_code import ResourceMetricsRequestTotalPerCode
|
15
|
+
from ..models.resource_metrics_request_total_per_code_previous import (
|
16
|
+
ResourceMetricsRequestTotalPerCodePrevious,
|
17
|
+
)
|
15
18
|
from ..models.resource_metrics_rps_per_code import ResourceMetricsRpsPerCode
|
19
|
+
from ..models.resource_metrics_rps_per_code_previous import ResourceMetricsRpsPerCodePrevious
|
16
20
|
from ..models.time_to_first_token_over_time_metrics import TimeToFirstTokenOverTimeMetrics
|
17
21
|
from ..models.token_rate_metrics import TokenRateMetrics
|
18
22
|
from ..models.token_total_metric import TokenTotalMetric
|
@@ -26,41 +30,69 @@ class ResourceMetrics:
|
|
26
30
|
"""Metrics for a single resource deployment (eg. model deployment, function deployment)
|
27
31
|
|
28
32
|
Attributes:
|
33
|
+
inference_errors_global (Union[Unset, list['Metric']]): Array of metrics
|
29
34
|
inference_global (Union[Unset, list['Metric']]): Array of metrics
|
30
35
|
last_n_requests (Union[Unset, list['Metric']]): Array of metrics
|
31
36
|
latency (Union[Unset, LatencyMetric]): Latency metrics
|
37
|
+
latency_previous (Union[Unset, LatencyMetric]): Latency metrics
|
32
38
|
memory_allocation (Union[Unset, MemoryAllocationMetric]): Metrics for memory allocation
|
33
39
|
model_ttft (Union[Unset, LatencyMetric]): Latency metrics
|
34
40
|
model_ttft_over_time (Union[Unset, TimeToFirstTokenOverTimeMetrics]): Time to first token over time metrics
|
35
41
|
request_duration_over_time (Union[Unset, RequestDurationOverTimeMetrics]): Request duration over time metrics
|
36
42
|
request_total (Union[Unset, float]): Number of requests for the resource globally
|
37
43
|
request_total_by_origin (Union[Unset, RequestTotalByOriginMetric]): Request total by origin metric
|
44
|
+
request_total_by_origin_previous (Union[Unset, RequestTotalByOriginMetric]): Request total by origin metric
|
38
45
|
request_total_per_code (Union[Unset, ResourceMetricsRequestTotalPerCode]): Number of requests for the resource
|
39
46
|
globally per code
|
47
|
+
request_total_per_code_previous (Union[Unset, ResourceMetricsRequestTotalPerCodePrevious]): Number of requests
|
48
|
+
for the resource globally per code for the previous period
|
49
|
+
request_total_previous (Union[Unset, float]): Number of requests for the resource globally for the previous
|
50
|
+
period
|
40
51
|
rps (Union[Unset, float]): Number of requests per second for the resource globally
|
41
52
|
rps_per_code (Union[Unset, ResourceMetricsRpsPerCode]): Number of requests per second for the resource globally
|
42
53
|
per code
|
54
|
+
rps_per_code_previous (Union[Unset, ResourceMetricsRpsPerCodePrevious]): Number of requests per second for the
|
55
|
+
resource globally per code for the previous period
|
56
|
+
rps_previous (Union[Unset, float]): Number of requests per second for the resource globally for the previous
|
57
|
+
period
|
43
58
|
token_rate (Union[Unset, TokenRateMetrics]): Token rate metrics
|
44
59
|
token_total (Union[Unset, TokenTotalMetric]): Token total metric
|
45
60
|
"""
|
46
61
|
|
62
|
+
inference_errors_global: Union[Unset, list["Metric"]] = UNSET
|
47
63
|
inference_global: Union[Unset, list["Metric"]] = UNSET
|
48
64
|
last_n_requests: Union[Unset, list["Metric"]] = UNSET
|
49
65
|
latency: Union[Unset, "LatencyMetric"] = UNSET
|
66
|
+
latency_previous: Union[Unset, "LatencyMetric"] = UNSET
|
50
67
|
memory_allocation: Union[Unset, "MemoryAllocationMetric"] = UNSET
|
51
68
|
model_ttft: Union[Unset, "LatencyMetric"] = UNSET
|
52
69
|
model_ttft_over_time: Union[Unset, "TimeToFirstTokenOverTimeMetrics"] = UNSET
|
53
70
|
request_duration_over_time: Union[Unset, "RequestDurationOverTimeMetrics"] = UNSET
|
54
71
|
request_total: Union[Unset, float] = UNSET
|
55
72
|
request_total_by_origin: Union[Unset, "RequestTotalByOriginMetric"] = UNSET
|
73
|
+
request_total_by_origin_previous: Union[Unset, "RequestTotalByOriginMetric"] = UNSET
|
56
74
|
request_total_per_code: Union[Unset, "ResourceMetricsRequestTotalPerCode"] = UNSET
|
75
|
+
request_total_per_code_previous: Union[Unset, "ResourceMetricsRequestTotalPerCodePrevious"] = UNSET
|
76
|
+
request_total_previous: Union[Unset, float] = UNSET
|
57
77
|
rps: Union[Unset, float] = UNSET
|
58
78
|
rps_per_code: Union[Unset, "ResourceMetricsRpsPerCode"] = UNSET
|
79
|
+
rps_per_code_previous: Union[Unset, "ResourceMetricsRpsPerCodePrevious"] = UNSET
|
80
|
+
rps_previous: Union[Unset, float] = UNSET
|
59
81
|
token_rate: Union[Unset, "TokenRateMetrics"] = UNSET
|
60
82
|
token_total: Union[Unset, "TokenTotalMetric"] = UNSET
|
61
83
|
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
62
84
|
|
63
85
|
def to_dict(self) -> dict[str, Any]:
|
86
|
+
inference_errors_global: Union[Unset, list[dict[str, Any]]] = UNSET
|
87
|
+
if not isinstance(self.inference_errors_global, Unset):
|
88
|
+
inference_errors_global = []
|
89
|
+
for componentsschemas_array_metric_item_data in self.inference_errors_global:
|
90
|
+
if type(componentsschemas_array_metric_item_data) == dict:
|
91
|
+
componentsschemas_array_metric_item = componentsschemas_array_metric_item_data
|
92
|
+
else:
|
93
|
+
componentsschemas_array_metric_item = componentsschemas_array_metric_item_data.to_dict()
|
94
|
+
inference_errors_global.append(componentsschemas_array_metric_item)
|
95
|
+
|
64
96
|
inference_global: Union[Unset, list[dict[str, Any]]] = UNSET
|
65
97
|
if not isinstance(self.inference_global, Unset):
|
66
98
|
inference_global = []
|
@@ -87,6 +119,16 @@ class ResourceMetrics:
|
|
87
119
|
elif self.latency and isinstance(self.latency, dict):
|
88
120
|
latency = self.latency
|
89
121
|
|
122
|
+
latency_previous: Union[Unset, dict[str, Any]] = UNSET
|
123
|
+
if (
|
124
|
+
self.latency_previous
|
125
|
+
and not isinstance(self.latency_previous, Unset)
|
126
|
+
and not isinstance(self.latency_previous, dict)
|
127
|
+
):
|
128
|
+
latency_previous = self.latency_previous.to_dict()
|
129
|
+
elif self.latency_previous and isinstance(self.latency_previous, dict):
|
130
|
+
latency_previous = self.latency_previous
|
131
|
+
|
90
132
|
memory_allocation: Union[Unset, dict[str, Any]] = UNSET
|
91
133
|
if (
|
92
134
|
self.memory_allocation
|
@@ -135,6 +177,16 @@ class ResourceMetrics:
|
|
135
177
|
elif self.request_total_by_origin and isinstance(self.request_total_by_origin, dict):
|
136
178
|
request_total_by_origin = self.request_total_by_origin
|
137
179
|
|
180
|
+
request_total_by_origin_previous: Union[Unset, dict[str, Any]] = UNSET
|
181
|
+
if (
|
182
|
+
self.request_total_by_origin_previous
|
183
|
+
and not isinstance(self.request_total_by_origin_previous, Unset)
|
184
|
+
and not isinstance(self.request_total_by_origin_previous, dict)
|
185
|
+
):
|
186
|
+
request_total_by_origin_previous = self.request_total_by_origin_previous.to_dict()
|
187
|
+
elif self.request_total_by_origin_previous and isinstance(self.request_total_by_origin_previous, dict):
|
188
|
+
request_total_by_origin_previous = self.request_total_by_origin_previous
|
189
|
+
|
138
190
|
request_total_per_code: Union[Unset, dict[str, Any]] = UNSET
|
139
191
|
if (
|
140
192
|
self.request_total_per_code
|
@@ -145,6 +197,18 @@ class ResourceMetrics:
|
|
145
197
|
elif self.request_total_per_code and isinstance(self.request_total_per_code, dict):
|
146
198
|
request_total_per_code = self.request_total_per_code
|
147
199
|
|
200
|
+
request_total_per_code_previous: Union[Unset, dict[str, Any]] = UNSET
|
201
|
+
if (
|
202
|
+
self.request_total_per_code_previous
|
203
|
+
and not isinstance(self.request_total_per_code_previous, Unset)
|
204
|
+
and not isinstance(self.request_total_per_code_previous, dict)
|
205
|
+
):
|
206
|
+
request_total_per_code_previous = self.request_total_per_code_previous.to_dict()
|
207
|
+
elif self.request_total_per_code_previous and isinstance(self.request_total_per_code_previous, dict):
|
208
|
+
request_total_per_code_previous = self.request_total_per_code_previous
|
209
|
+
|
210
|
+
request_total_previous = self.request_total_previous
|
211
|
+
|
148
212
|
rps = self.rps
|
149
213
|
|
150
214
|
rps_per_code: Union[Unset, dict[str, Any]] = UNSET
|
@@ -153,6 +217,18 @@ class ResourceMetrics:
|
|
153
217
|
elif self.rps_per_code and isinstance(self.rps_per_code, dict):
|
154
218
|
rps_per_code = self.rps_per_code
|
155
219
|
|
220
|
+
rps_per_code_previous: Union[Unset, dict[str, Any]] = UNSET
|
221
|
+
if (
|
222
|
+
self.rps_per_code_previous
|
223
|
+
and not isinstance(self.rps_per_code_previous, Unset)
|
224
|
+
and not isinstance(self.rps_per_code_previous, dict)
|
225
|
+
):
|
226
|
+
rps_per_code_previous = self.rps_per_code_previous.to_dict()
|
227
|
+
elif self.rps_per_code_previous and isinstance(self.rps_per_code_previous, dict):
|
228
|
+
rps_per_code_previous = self.rps_per_code_previous
|
229
|
+
|
230
|
+
rps_previous = self.rps_previous
|
231
|
+
|
156
232
|
token_rate: Union[Unset, dict[str, Any]] = UNSET
|
157
233
|
if self.token_rate and not isinstance(self.token_rate, Unset) and not isinstance(self.token_rate, dict):
|
158
234
|
token_rate = self.token_rate.to_dict()
|
@@ -168,12 +244,16 @@ class ResourceMetrics:
|
|
168
244
|
field_dict: dict[str, Any] = {}
|
169
245
|
field_dict.update(self.additional_properties)
|
170
246
|
field_dict.update({})
|
247
|
+
if inference_errors_global is not UNSET:
|
248
|
+
field_dict["inferenceErrorsGlobal"] = inference_errors_global
|
171
249
|
if inference_global is not UNSET:
|
172
250
|
field_dict["inferenceGlobal"] = inference_global
|
173
251
|
if last_n_requests is not UNSET:
|
174
252
|
field_dict["lastNRequests"] = last_n_requests
|
175
253
|
if latency is not UNSET:
|
176
254
|
field_dict["latency"] = latency
|
255
|
+
if latency_previous is not UNSET:
|
256
|
+
field_dict["latencyPrevious"] = latency_previous
|
177
257
|
if memory_allocation is not UNSET:
|
178
258
|
field_dict["memoryAllocation"] = memory_allocation
|
179
259
|
if model_ttft is not UNSET:
|
@@ -186,12 +266,22 @@ class ResourceMetrics:
|
|
186
266
|
field_dict["requestTotal"] = request_total
|
187
267
|
if request_total_by_origin is not UNSET:
|
188
268
|
field_dict["requestTotalByOrigin"] = request_total_by_origin
|
269
|
+
if request_total_by_origin_previous is not UNSET:
|
270
|
+
field_dict["requestTotalByOriginPrevious"] = request_total_by_origin_previous
|
189
271
|
if request_total_per_code is not UNSET:
|
190
272
|
field_dict["requestTotalPerCode"] = request_total_per_code
|
273
|
+
if request_total_per_code_previous is not UNSET:
|
274
|
+
field_dict["requestTotalPerCodePrevious"] = request_total_per_code_previous
|
275
|
+
if request_total_previous is not UNSET:
|
276
|
+
field_dict["requestTotalPrevious"] = request_total_previous
|
191
277
|
if rps is not UNSET:
|
192
278
|
field_dict["rps"] = rps
|
193
279
|
if rps_per_code is not UNSET:
|
194
280
|
field_dict["rpsPerCode"] = rps_per_code
|
281
|
+
if rps_per_code_previous is not UNSET:
|
282
|
+
field_dict["rpsPerCodePrevious"] = rps_per_code_previous
|
283
|
+
if rps_previous is not UNSET:
|
284
|
+
field_dict["rpsPrevious"] = rps_previous
|
195
285
|
if token_rate is not UNSET:
|
196
286
|
field_dict["tokenRate"] = token_rate
|
197
287
|
if token_total is not UNSET:
|
@@ -209,7 +299,13 @@ class ResourceMetrics:
|
|
209
299
|
from ..models.resource_metrics_request_total_per_code import (
|
210
300
|
ResourceMetricsRequestTotalPerCode,
|
211
301
|
)
|
302
|
+
from ..models.resource_metrics_request_total_per_code_previous import (
|
303
|
+
ResourceMetricsRequestTotalPerCodePrevious,
|
304
|
+
)
|
212
305
|
from ..models.resource_metrics_rps_per_code import ResourceMetricsRpsPerCode
|
306
|
+
from ..models.resource_metrics_rps_per_code_previous import (
|
307
|
+
ResourceMetricsRpsPerCodePrevious,
|
308
|
+
)
|
213
309
|
from ..models.time_to_first_token_over_time_metrics import TimeToFirstTokenOverTimeMetrics
|
214
310
|
from ..models.token_rate_metrics import TokenRateMetrics
|
215
311
|
from ..models.token_total_metric import TokenTotalMetric
|
@@ -217,6 +313,13 @@ class ResourceMetrics:
|
|
217
313
|
if not src_dict:
|
218
314
|
return None
|
219
315
|
d = src_dict.copy()
|
316
|
+
inference_errors_global = []
|
317
|
+
_inference_errors_global = d.pop("inferenceErrorsGlobal", UNSET)
|
318
|
+
for componentsschemas_array_metric_item_data in _inference_errors_global or []:
|
319
|
+
componentsschemas_array_metric_item = Metric.from_dict(componentsschemas_array_metric_item_data)
|
320
|
+
|
321
|
+
inference_errors_global.append(componentsschemas_array_metric_item)
|
322
|
+
|
220
323
|
inference_global = []
|
221
324
|
_inference_global = d.pop("inferenceGlobal", UNSET)
|
222
325
|
for componentsschemas_array_metric_item_data in _inference_global or []:
|
@@ -238,6 +341,13 @@ class ResourceMetrics:
|
|
238
341
|
else:
|
239
342
|
latency = LatencyMetric.from_dict(_latency)
|
240
343
|
|
344
|
+
_latency_previous = d.pop("latencyPrevious", UNSET)
|
345
|
+
latency_previous: Union[Unset, LatencyMetric]
|
346
|
+
if isinstance(_latency_previous, Unset):
|
347
|
+
latency_previous = UNSET
|
348
|
+
else:
|
349
|
+
latency_previous = LatencyMetric.from_dict(_latency_previous)
|
350
|
+
|
241
351
|
_memory_allocation = d.pop("memoryAllocation", UNSET)
|
242
352
|
memory_allocation: Union[Unset, MemoryAllocationMetric]
|
243
353
|
if isinstance(_memory_allocation, Unset):
|
@@ -275,6 +385,13 @@ class ResourceMetrics:
|
|
275
385
|
else:
|
276
386
|
request_total_by_origin = RequestTotalByOriginMetric.from_dict(_request_total_by_origin)
|
277
387
|
|
388
|
+
_request_total_by_origin_previous = d.pop("requestTotalByOriginPrevious", UNSET)
|
389
|
+
request_total_by_origin_previous: Union[Unset, RequestTotalByOriginMetric]
|
390
|
+
if isinstance(_request_total_by_origin_previous, Unset):
|
391
|
+
request_total_by_origin_previous = UNSET
|
392
|
+
else:
|
393
|
+
request_total_by_origin_previous = RequestTotalByOriginMetric.from_dict(_request_total_by_origin_previous)
|
394
|
+
|
278
395
|
_request_total_per_code = d.pop("requestTotalPerCode", UNSET)
|
279
396
|
request_total_per_code: Union[Unset, ResourceMetricsRequestTotalPerCode]
|
280
397
|
if isinstance(_request_total_per_code, Unset):
|
@@ -282,6 +399,17 @@ class ResourceMetrics:
|
|
282
399
|
else:
|
283
400
|
request_total_per_code = ResourceMetricsRequestTotalPerCode.from_dict(_request_total_per_code)
|
284
401
|
|
402
|
+
_request_total_per_code_previous = d.pop("requestTotalPerCodePrevious", UNSET)
|
403
|
+
request_total_per_code_previous: Union[Unset, ResourceMetricsRequestTotalPerCodePrevious]
|
404
|
+
if isinstance(_request_total_per_code_previous, Unset):
|
405
|
+
request_total_per_code_previous = UNSET
|
406
|
+
else:
|
407
|
+
request_total_per_code_previous = ResourceMetricsRequestTotalPerCodePrevious.from_dict(
|
408
|
+
_request_total_per_code_previous
|
409
|
+
)
|
410
|
+
|
411
|
+
request_total_previous = d.pop("requestTotalPrevious", UNSET)
|
412
|
+
|
285
413
|
rps = d.pop("rps", UNSET)
|
286
414
|
|
287
415
|
_rps_per_code = d.pop("rpsPerCode", UNSET)
|
@@ -291,6 +419,15 @@ class ResourceMetrics:
|
|
291
419
|
else:
|
292
420
|
rps_per_code = ResourceMetricsRpsPerCode.from_dict(_rps_per_code)
|
293
421
|
|
422
|
+
_rps_per_code_previous = d.pop("rpsPerCodePrevious", UNSET)
|
423
|
+
rps_per_code_previous: Union[Unset, ResourceMetricsRpsPerCodePrevious]
|
424
|
+
if isinstance(_rps_per_code_previous, Unset):
|
425
|
+
rps_per_code_previous = UNSET
|
426
|
+
else:
|
427
|
+
rps_per_code_previous = ResourceMetricsRpsPerCodePrevious.from_dict(_rps_per_code_previous)
|
428
|
+
|
429
|
+
rps_previous = d.pop("rpsPrevious", UNSET)
|
430
|
+
|
294
431
|
_token_rate = d.pop("tokenRate", UNSET)
|
295
432
|
token_rate: Union[Unset, TokenRateMetrics]
|
296
433
|
if isinstance(_token_rate, Unset):
|
@@ -306,18 +443,25 @@ class ResourceMetrics:
|
|
306
443
|
token_total = TokenTotalMetric.from_dict(_token_total)
|
307
444
|
|
308
445
|
resource_metrics = cls(
|
446
|
+
inference_errors_global=inference_errors_global,
|
309
447
|
inference_global=inference_global,
|
310
448
|
last_n_requests=last_n_requests,
|
311
449
|
latency=latency,
|
450
|
+
latency_previous=latency_previous,
|
312
451
|
memory_allocation=memory_allocation,
|
313
452
|
model_ttft=model_ttft,
|
314
453
|
model_ttft_over_time=model_ttft_over_time,
|
315
454
|
request_duration_over_time=request_duration_over_time,
|
316
455
|
request_total=request_total,
|
317
456
|
request_total_by_origin=request_total_by_origin,
|
457
|
+
request_total_by_origin_previous=request_total_by_origin_previous,
|
318
458
|
request_total_per_code=request_total_per_code,
|
459
|
+
request_total_per_code_previous=request_total_per_code_previous,
|
460
|
+
request_total_previous=request_total_previous,
|
319
461
|
rps=rps,
|
320
462
|
rps_per_code=rps_per_code,
|
463
|
+
rps_per_code_previous=rps_per_code_previous,
|
464
|
+
rps_previous=rps_previous,
|
321
465
|
token_rate=token_rate,
|
322
466
|
token_total=token_total,
|
323
467
|
)
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from typing import Any, TypeVar
|
2
|
+
|
3
|
+
from attrs import define as _attrs_define
|
4
|
+
from attrs import field as _attrs_field
|
5
|
+
|
6
|
+
T = TypeVar("T", bound="ResourceMetricsRequestTotalPerCodePrevious")
|
7
|
+
|
8
|
+
|
9
|
+
@_attrs_define
|
10
|
+
class ResourceMetricsRequestTotalPerCodePrevious:
|
11
|
+
"""Number of requests for the resource globally per code for the previous period"""
|
12
|
+
|
13
|
+
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
14
|
+
|
15
|
+
def to_dict(self) -> dict[str, Any]:
|
16
|
+
field_dict: dict[str, Any] = {}
|
17
|
+
field_dict.update(self.additional_properties)
|
18
|
+
|
19
|
+
return field_dict
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def from_dict(cls: type[T], src_dict: dict[str, Any]) -> T:
|
23
|
+
if not src_dict:
|
24
|
+
return None
|
25
|
+
d = src_dict.copy()
|
26
|
+
resource_metrics_request_total_per_code_previous = cls()
|
27
|
+
|
28
|
+
resource_metrics_request_total_per_code_previous.additional_properties = d
|
29
|
+
return resource_metrics_request_total_per_code_previous
|
30
|
+
|
31
|
+
@property
|
32
|
+
def additional_keys(self) -> list[str]:
|
33
|
+
return list(self.additional_properties.keys())
|
34
|
+
|
35
|
+
def __getitem__(self, key: str) -> Any:
|
36
|
+
return self.additional_properties[key]
|
37
|
+
|
38
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
39
|
+
self.additional_properties[key] = value
|
40
|
+
|
41
|
+
def __delitem__(self, key: str) -> None:
|
42
|
+
del self.additional_properties[key]
|
43
|
+
|
44
|
+
def __contains__(self, key: str) -> bool:
|
45
|
+
return key in self.additional_properties
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from typing import Any, TypeVar
|
2
|
+
|
3
|
+
from attrs import define as _attrs_define
|
4
|
+
from attrs import field as _attrs_field
|
5
|
+
|
6
|
+
T = TypeVar("T", bound="ResourceMetricsRpsPerCodePrevious")
|
7
|
+
|
8
|
+
|
9
|
+
@_attrs_define
|
10
|
+
class ResourceMetricsRpsPerCodePrevious:
|
11
|
+
"""Number of requests per second for the resource globally per code for the previous period"""
|
12
|
+
|
13
|
+
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
14
|
+
|
15
|
+
def to_dict(self) -> dict[str, Any]:
|
16
|
+
field_dict: dict[str, Any] = {}
|
17
|
+
field_dict.update(self.additional_properties)
|
18
|
+
|
19
|
+
return field_dict
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def from_dict(cls: type[T], src_dict: dict[str, Any]) -> T:
|
23
|
+
if not src_dict:
|
24
|
+
return None
|
25
|
+
d = src_dict.copy()
|
26
|
+
resource_metrics_rps_per_code_previous = cls()
|
27
|
+
|
28
|
+
resource_metrics_rps_per_code_previous.additional_properties = d
|
29
|
+
return resource_metrics_rps_per_code_previous
|
30
|
+
|
31
|
+
@property
|
32
|
+
def additional_keys(self) -> list[str]:
|
33
|
+
return list(self.additional_properties.keys())
|
34
|
+
|
35
|
+
def __getitem__(self, key: str) -> Any:
|
36
|
+
return self.additional_properties[key]
|
37
|
+
|
38
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
39
|
+
self.additional_properties[key] = value
|
40
|
+
|
41
|
+
def __delitem__(self, key: str) -> None:
|
42
|
+
del self.additional_properties[key]
|
43
|
+
|
44
|
+
def __contains__(self, key: str) -> bool:
|
45
|
+
return key in self.additional_properties
|
blaxel/client/models/runtime.py
CHANGED
@@ -6,6 +6,8 @@ from attrs import field as _attrs_field
|
|
6
6
|
from ..types import UNSET, Unset
|
7
7
|
|
8
8
|
if TYPE_CHECKING:
|
9
|
+
from ..models.port import Port
|
10
|
+
from ..models.runtime_configuration import RuntimeConfiguration
|
9
11
|
from ..models.runtime_startup_probe import RuntimeStartupProbe
|
10
12
|
|
11
13
|
|
@@ -19,33 +21,44 @@ class Runtime:
|
|
19
21
|
Attributes:
|
20
22
|
args (Union[Unset, list[Any]]): The arguments to pass to the deployment runtime
|
21
23
|
command (Union[Unset, list[Any]]): The command to run the deployment
|
24
|
+
configuration (Union[Unset, RuntimeConfiguration]): The configuration for the deployment
|
22
25
|
cpu (Union[Unset, int]): The CPU for the deployment in cores, only available for private cluster
|
23
26
|
endpoint_name (Union[Unset, str]): Endpoint Name of the model. In case of hf_private_endpoint, it is the
|
24
27
|
endpoint name. In case of hf_public_endpoint, it is not used.
|
25
28
|
envs (Union[Unset, list[Any]]): The env variables to set in the deployment. Should be a list of Kubernetes
|
26
29
|
EnvVar types
|
30
|
+
generation (Union[Unset, str]): The generation of the deployment
|
27
31
|
image (Union[Unset, str]): The Docker image for the deployment
|
32
|
+
max_scale (Union[Unset, int]): The minimum number of replicas for the deployment. Can be 0 or 1 (in which case
|
33
|
+
the deployment is always running in at least one location).
|
28
34
|
memory (Union[Unset, int]): The memory for the deployment in MB
|
29
35
|
metric_port (Union[Unset, int]): The port to serve the metrics on
|
36
|
+
min_scale (Union[Unset, int]): The maximum number of replicas for the deployment.
|
30
37
|
model (Union[Unset, str]): The slug name of the origin model at HuggingFace.
|
31
38
|
organization (Union[Unset, str]): The organization of the model
|
32
|
-
|
39
|
+
ports (Union[Unset, list['Port']]): Set of ports for a resource
|
33
40
|
startup_probe (Union[Unset, RuntimeStartupProbe]): The readiness probe. Should be a Kubernetes Probe type
|
41
|
+
timeout (Union[Unset, int]): The timeout for the deployment in seconds
|
34
42
|
type_ (Union[Unset, str]): The type of origin for the deployment (hf_private_endpoint, hf_public_endpoint)
|
35
43
|
"""
|
36
44
|
|
37
45
|
args: Union[Unset, list[Any]] = UNSET
|
38
46
|
command: Union[Unset, list[Any]] = UNSET
|
47
|
+
configuration: Union[Unset, "RuntimeConfiguration"] = UNSET
|
39
48
|
cpu: Union[Unset, int] = UNSET
|
40
49
|
endpoint_name: Union[Unset, str] = UNSET
|
41
50
|
envs: Union[Unset, list[Any]] = UNSET
|
51
|
+
generation: Union[Unset, str] = UNSET
|
42
52
|
image: Union[Unset, str] = UNSET
|
53
|
+
max_scale: Union[Unset, int] = UNSET
|
43
54
|
memory: Union[Unset, int] = UNSET
|
44
55
|
metric_port: Union[Unset, int] = UNSET
|
56
|
+
min_scale: Union[Unset, int] = UNSET
|
45
57
|
model: Union[Unset, str] = UNSET
|
46
58
|
organization: Union[Unset, str] = UNSET
|
47
|
-
|
59
|
+
ports: Union[Unset, list["Port"]] = UNSET
|
48
60
|
startup_probe: Union[Unset, "RuntimeStartupProbe"] = UNSET
|
61
|
+
timeout: Union[Unset, int] = UNSET
|
49
62
|
type_: Union[Unset, str] = UNSET
|
50
63
|
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
51
64
|
|
@@ -58,6 +71,16 @@ class Runtime:
|
|
58
71
|
if not isinstance(self.command, Unset):
|
59
72
|
command = self.command
|
60
73
|
|
74
|
+
configuration: Union[Unset, dict[str, Any]] = UNSET
|
75
|
+
if (
|
76
|
+
self.configuration
|
77
|
+
and not isinstance(self.configuration, Unset)
|
78
|
+
and not isinstance(self.configuration, dict)
|
79
|
+
):
|
80
|
+
configuration = self.configuration.to_dict()
|
81
|
+
elif self.configuration and isinstance(self.configuration, dict):
|
82
|
+
configuration = self.configuration
|
83
|
+
|
61
84
|
cpu = self.cpu
|
62
85
|
|
63
86
|
endpoint_name = self.endpoint_name
|
@@ -66,17 +89,31 @@ class Runtime:
|
|
66
89
|
if not isinstance(self.envs, Unset):
|
67
90
|
envs = self.envs
|
68
91
|
|
92
|
+
generation = self.generation
|
93
|
+
|
69
94
|
image = self.image
|
70
95
|
|
96
|
+
max_scale = self.max_scale
|
97
|
+
|
71
98
|
memory = self.memory
|
72
99
|
|
73
100
|
metric_port = self.metric_port
|
74
101
|
|
102
|
+
min_scale = self.min_scale
|
103
|
+
|
75
104
|
model = self.model
|
76
105
|
|
77
106
|
organization = self.organization
|
78
107
|
|
79
|
-
|
108
|
+
ports: Union[Unset, list[dict[str, Any]]] = UNSET
|
109
|
+
if not isinstance(self.ports, Unset):
|
110
|
+
ports = []
|
111
|
+
for componentsschemas_ports_item_data in self.ports:
|
112
|
+
if type(componentsschemas_ports_item_data) == dict:
|
113
|
+
componentsschemas_ports_item = componentsschemas_ports_item_data
|
114
|
+
else:
|
115
|
+
componentsschemas_ports_item = componentsschemas_ports_item_data.to_dict()
|
116
|
+
ports.append(componentsschemas_ports_item)
|
80
117
|
|
81
118
|
startup_probe: Union[Unset, dict[str, Any]] = UNSET
|
82
119
|
if (
|
@@ -88,6 +125,8 @@ class Runtime:
|
|
88
125
|
elif self.startup_probe and isinstance(self.startup_probe, dict):
|
89
126
|
startup_probe = self.startup_probe
|
90
127
|
|
128
|
+
timeout = self.timeout
|
129
|
+
|
91
130
|
type_ = self.type_
|
92
131
|
|
93
132
|
field_dict: dict[str, Any] = {}
|
@@ -97,26 +136,36 @@ class Runtime:
|
|
97
136
|
field_dict["args"] = args
|
98
137
|
if command is not UNSET:
|
99
138
|
field_dict["command"] = command
|
139
|
+
if configuration is not UNSET:
|
140
|
+
field_dict["configuration"] = configuration
|
100
141
|
if cpu is not UNSET:
|
101
142
|
field_dict["cpu"] = cpu
|
102
143
|
if endpoint_name is not UNSET:
|
103
144
|
field_dict["endpointName"] = endpoint_name
|
104
145
|
if envs is not UNSET:
|
105
146
|
field_dict["envs"] = envs
|
147
|
+
if generation is not UNSET:
|
148
|
+
field_dict["generation"] = generation
|
106
149
|
if image is not UNSET:
|
107
150
|
field_dict["image"] = image
|
151
|
+
if max_scale is not UNSET:
|
152
|
+
field_dict["maxScale"] = max_scale
|
108
153
|
if memory is not UNSET:
|
109
154
|
field_dict["memory"] = memory
|
110
155
|
if metric_port is not UNSET:
|
111
156
|
field_dict["metricPort"] = metric_port
|
157
|
+
if min_scale is not UNSET:
|
158
|
+
field_dict["minScale"] = min_scale
|
112
159
|
if model is not UNSET:
|
113
160
|
field_dict["model"] = model
|
114
161
|
if organization is not UNSET:
|
115
162
|
field_dict["organization"] = organization
|
116
|
-
if
|
117
|
-
field_dict["
|
163
|
+
if ports is not UNSET:
|
164
|
+
field_dict["ports"] = ports
|
118
165
|
if startup_probe is not UNSET:
|
119
166
|
field_dict["startupProbe"] = startup_probe
|
167
|
+
if timeout is not UNSET:
|
168
|
+
field_dict["timeout"] = timeout
|
120
169
|
if type_ is not UNSET:
|
121
170
|
field_dict["type"] = type_
|
122
171
|
|
@@ -124,6 +173,8 @@ class Runtime:
|
|
124
173
|
|
125
174
|
@classmethod
|
126
175
|
def from_dict(cls: type[T], src_dict: dict[str, Any]) -> T:
|
176
|
+
from ..models.port import Port
|
177
|
+
from ..models.runtime_configuration import RuntimeConfiguration
|
127
178
|
from ..models.runtime_startup_probe import RuntimeStartupProbe
|
128
179
|
|
129
180
|
if not src_dict:
|
@@ -133,23 +184,41 @@ class Runtime:
|
|
133
184
|
|
134
185
|
command = cast(list[Any], d.pop("command", UNSET))
|
135
186
|
|
187
|
+
_configuration = d.pop("configuration", UNSET)
|
188
|
+
configuration: Union[Unset, RuntimeConfiguration]
|
189
|
+
if isinstance(_configuration, Unset):
|
190
|
+
configuration = UNSET
|
191
|
+
else:
|
192
|
+
configuration = RuntimeConfiguration.from_dict(_configuration)
|
193
|
+
|
136
194
|
cpu = d.pop("cpu", UNSET)
|
137
195
|
|
138
196
|
endpoint_name = d.pop("endpointName", UNSET)
|
139
197
|
|
140
198
|
envs = cast(list[Any], d.pop("envs", UNSET))
|
141
199
|
|
200
|
+
generation = d.pop("generation", UNSET)
|
201
|
+
|
142
202
|
image = d.pop("image", UNSET)
|
143
203
|
|
204
|
+
max_scale = d.pop("maxScale", UNSET)
|
205
|
+
|
144
206
|
memory = d.pop("memory", UNSET)
|
145
207
|
|
146
208
|
metric_port = d.pop("metricPort", UNSET)
|
147
209
|
|
210
|
+
min_scale = d.pop("minScale", UNSET)
|
211
|
+
|
148
212
|
model = d.pop("model", UNSET)
|
149
213
|
|
150
214
|
organization = d.pop("organization", UNSET)
|
151
215
|
|
152
|
-
|
216
|
+
ports = []
|
217
|
+
_ports = d.pop("ports", UNSET)
|
218
|
+
for componentsschemas_ports_item_data in _ports or []:
|
219
|
+
componentsschemas_ports_item = Port.from_dict(componentsschemas_ports_item_data)
|
220
|
+
|
221
|
+
ports.append(componentsschemas_ports_item)
|
153
222
|
|
154
223
|
_startup_probe = d.pop("startupProbe", UNSET)
|
155
224
|
startup_probe: Union[Unset, RuntimeStartupProbe]
|
@@ -158,21 +227,28 @@ class Runtime:
|
|
158
227
|
else:
|
159
228
|
startup_probe = RuntimeStartupProbe.from_dict(_startup_probe)
|
160
229
|
|
230
|
+
timeout = d.pop("timeout", UNSET)
|
231
|
+
|
161
232
|
type_ = d.pop("type", UNSET)
|
162
233
|
|
163
234
|
runtime = cls(
|
164
235
|
args=args,
|
165
236
|
command=command,
|
237
|
+
configuration=configuration,
|
166
238
|
cpu=cpu,
|
167
239
|
endpoint_name=endpoint_name,
|
168
240
|
envs=envs,
|
241
|
+
generation=generation,
|
169
242
|
image=image,
|
243
|
+
max_scale=max_scale,
|
170
244
|
memory=memory,
|
171
245
|
metric_port=metric_port,
|
246
|
+
min_scale=min_scale,
|
172
247
|
model=model,
|
173
248
|
organization=organization,
|
174
|
-
|
249
|
+
ports=ports,
|
175
250
|
startup_probe=startup_probe,
|
251
|
+
timeout=timeout,
|
176
252
|
type_=type_,
|
177
253
|
)
|
178
254
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from typing import Any, TypeVar
|
2
|
+
|
3
|
+
from attrs import define as _attrs_define
|
4
|
+
from attrs import field as _attrs_field
|
5
|
+
|
6
|
+
T = TypeVar("T", bound="RuntimeConfiguration")
|
7
|
+
|
8
|
+
|
9
|
+
@_attrs_define
|
10
|
+
class RuntimeConfiguration:
|
11
|
+
"""The configuration for the deployment"""
|
12
|
+
|
13
|
+
additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
|
14
|
+
|
15
|
+
def to_dict(self) -> dict[str, Any]:
|
16
|
+
field_dict: dict[str, Any] = {}
|
17
|
+
field_dict.update(self.additional_properties)
|
18
|
+
|
19
|
+
return field_dict
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def from_dict(cls: type[T], src_dict: dict[str, Any]) -> T:
|
23
|
+
if not src_dict:
|
24
|
+
return None
|
25
|
+
d = src_dict.copy()
|
26
|
+
runtime_configuration = cls()
|
27
|
+
|
28
|
+
runtime_configuration.additional_properties = d
|
29
|
+
return runtime_configuration
|
30
|
+
|
31
|
+
@property
|
32
|
+
def additional_keys(self) -> list[str]:
|
33
|
+
return list(self.additional_properties.keys())
|
34
|
+
|
35
|
+
def __getitem__(self, key: str) -> Any:
|
36
|
+
return self.additional_properties[key]
|
37
|
+
|
38
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
39
|
+
self.additional_properties[key] = value
|
40
|
+
|
41
|
+
def __delitem__(self, key: str) -> None:
|
42
|
+
del self.additional_properties[key]
|
43
|
+
|
44
|
+
def __contains__(self, key: str) -> bool:
|
45
|
+
return key in self.additional_properties
|