frogml 1.2.50__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frogml/__init__.py +1 -1
- frogml/core/clients/batch_job_management/client.py +269 -257
- frogml/core/clients/batch_job_management/executions_config.py +10 -3
- frogml/core/clients/build_orchestrator/build_model_request_getter.py +7 -1
- frogml/core/clients/build_orchestrator/client.py +108 -67
- frogml/core/clients/build_orchestrator/internal_client.py +42 -38
- frogml/core/clients/feature_store/management_client.py +58 -39
- frogml/core/clients/feature_store/operator_client.py +6 -4
- frogml/core/clients/model_group_management/client.py +5 -2
- frogml/core/clients/model_management/client.py +25 -8
- frogml/core/clients/model_version_manager/build_model_version_dto.py +4 -1
- frogml/core/clients/model_version_manager/client.py +67 -68
- frogml/core/exceptions/__init__.py +20 -2
- frogml/core/exceptions/frogml_exception.py +35 -5
- frogml/core/exceptions/frogml_general_build_exception.py +19 -11
- frogml/core/exceptions/frogml_grpc_address_exception.py +15 -4
- frogml/core/exceptions/frogml_http_exception.py +3 -1
- frogml/core/exceptions/frogml_login_exception.py +16 -5
- frogml/core/exceptions/frogml_not_found_exception.py +16 -3
- frogml/core/exceptions/frogml_remote_build_failed.py +1 -1
- frogml/core/exceptions/frogml_token_exception.py +17 -5
- frogml/core/exceptions/grpc_status_mapping.py +43 -0
- frogml/core/inner/build_logic/phases/phase_010_fetch_model/fetch_strategy_manager/strategy/git/git_strategy.py +10 -2
- frogml/core/inner/build_logic/phases/phase_010_fetch_model/pre_fetch_validation_step.py +3 -2
- frogml/core/inner/build_logic/phases/phase_020_remote_register_frogml_build/upload_step.py +13 -10
- frogml/core/inner/build_logic/run_handlers/programmatic_phase_run_handler.py +8 -3
- frogml/core/inner/build_logic/tools/files.py +1 -2
- frogml/core/inner/model_loggers_utils.py +21 -8
- frogml/core/inner/tool/auth/auth_client.py +1 -1
- frogml/core/inner/tool/grpc/grpc_try_wrapping.py +51 -72
- frogml/core/inner/tool/protobuf_factory.py +8 -2
- frogml/sdk/frogml_client/client.py +29 -10
- frogml/sdk/model/adapters/input_adapters/numpy_input_adapter.py +6 -1
- frogml/sdk/model/adapters/output_adapters/numpy_output_adapter.py +4 -1
- frogml/sdk/model/decorators/api.py +6 -1
- frogml/sdk/model/tools/adapters/output.py +6 -2
- frogml/sdk/model_version/catboost/__init__.py +4 -1
- frogml/sdk/model_version/huggingface/__init__.py +4 -1
- frogml/sdk/model_version/model_loggers/catboost_model_version_manager.py +4 -1
- frogml/sdk/model_version/model_loggers/huggingface_model_version_manager.py +10 -2
- frogml/sdk/model_version/model_loggers/onnx_model_version_manager.py +4 -1
- frogml/sdk/model_version/model_loggers/pytorch_model_version_manager.py +4 -1
- frogml/sdk/model_version/model_loggers/scikit_learn_model_version_manager.py +4 -1
- frogml/sdk/model_version/onnx/__init__.py +4 -1
- frogml/sdk/model_version/pytorch/__init__.py +4 -1
- frogml/sdk/model_version/scikit_learn/__init__.py +4 -1
- frogml/sdk/model_version/utils/jml/customer_client.py +1 -1
- frogml/sdk/model_version/utils/storage.py +4 -1
- frogml/sdk/model_version/utils/validations.py +10 -2
- {frogml-1.2.50.dist-info → frogml-2.0.0.dist-info}/METADATA +1 -1
- {frogml-1.2.50.dist-info → frogml-2.0.0.dist-info}/RECORD +58 -61
- frogml_services_mock/mocks/batch_job_manager_service.py +11 -9
- frogml_services_mock/mocks/build_orchestrator_service_api.py +4 -0
- frogml_services_mock/mocks/ecosystem_service_api.py +1 -2
- frogml_services_mock/mocks/model_group_management_service.py +20 -9
- frogml_services_mock/mocks/model_version_manager_service.py +3 -1
- frogml_services_mock/mocks/project_manager_service.py +6 -2
- frogml/core/exceptions/frogml_decode_exception.py +0 -7
- frogml/core/exceptions/frogml_external_exception.py +0 -11
- frogml/core/exceptions/frogml_load_model_failed_exception.py +0 -10
- frogml/core/exceptions/quiet_error.py +0 -22
- {frogml-1.2.50.dist-info → frogml-2.0.0.dist-info}/WHEEL +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
2
3
|
from dependency_injector.wiring import Provide
|
|
3
4
|
|
|
4
5
|
from frogml._proto.qwak.batch_job.v1.batch_job_service_pb2 import (
|
|
@@ -36,7 +37,6 @@ from frogml._proto.qwak.batch_job.v1.batch_job_service_pb2 import (
|
|
|
36
37
|
StartBatchJobResponse,
|
|
37
38
|
StartWarmupJobRequest,
|
|
38
39
|
StartWarmupJobResponse,
|
|
39
|
-
UpdateTasksDetailsResponse,
|
|
40
40
|
UpdateTasksDetailsRequest,
|
|
41
41
|
BatchTaskDetails,
|
|
42
42
|
InputFileDetails,
|
|
@@ -56,6 +56,7 @@ from frogml.core.clients.logging_client import LoggingClient
|
|
|
56
56
|
from frogml.core.clients.model_management import ModelsManagementClient
|
|
57
57
|
from frogml.core.exceptions import FrogmlException
|
|
58
58
|
from frogml.core.inner.di_configuration import FrogmlContainer
|
|
59
|
+
from frogml.core.inner.tool.grpc.grpc_try_wrapping import grpc_try_catch_wrapper
|
|
59
60
|
|
|
60
61
|
from .executions_config import (
|
|
61
62
|
INPUT_FORMATTERS_MAP,
|
|
@@ -71,7 +72,6 @@ from .results import (
|
|
|
71
72
|
GetExecutionReportResult,
|
|
72
73
|
StartExecutionResult,
|
|
73
74
|
)
|
|
74
|
-
from typing import List, Optional
|
|
75
75
|
|
|
76
76
|
CLIENT_TIMEOUT = 180 # Seconds
|
|
77
77
|
|
|
@@ -80,7 +80,7 @@ class BatchJobManagerClient:
|
|
|
80
80
|
def __init__(
|
|
81
81
|
self,
|
|
82
82
|
grpc_channel=Provide[FrogmlContainer.core_grpc_channel],
|
|
83
|
-
logging_client: LoggingClient = None,
|
|
83
|
+
logging_client: Optional[LoggingClient] = None,
|
|
84
84
|
):
|
|
85
85
|
self.batch_job_management = BatchJobManagementServiceStub(grpc_channel)
|
|
86
86
|
self.logging_client = logging_client
|
|
@@ -98,80 +98,80 @@ class BatchJobManagerClient:
|
|
|
98
98
|
Returns:
|
|
99
99
|
The response received from the api. On successful start of execution a batch job id is returned
|
|
100
100
|
"""
|
|
101
|
-
job_size =
|
|
101
|
+
job_size: BatchJobDeploymentSize = (
|
|
102
|
+
self.get_batch_deployment_size_from_resources(execution_config)
|
|
103
|
+
)
|
|
102
104
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
105
|
+
user_raw_input_format: str = execution_config.execution.input_file_type.upper()
|
|
106
|
+
user_raw_output_format: str = (
|
|
107
|
+
execution_config.execution.output_file_type.upper()
|
|
108
|
+
)
|
|
106
109
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
110
|
+
if user_raw_input_format not in INPUT_FORMATTERS_MAP:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
f"Invalid input format - please choose one of {list(INPUT_FORMATTERS_MAP.keys())}"
|
|
113
|
+
)
|
|
111
114
|
|
|
112
|
-
|
|
115
|
+
if user_raw_output_format not in OUTPUT_FORMATTERS_MAP:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
f"Invalid output format - please choose one of {list(OUTPUT_FORMATTERS_MAP.keys())}"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
user_purchase_option: Optional[str] = (
|
|
121
|
+
execution_config.advanced_options.purchase_option
|
|
122
|
+
)
|
|
123
|
+
if user_purchase_option is not None:
|
|
124
|
+
if user_purchase_option not in PURCHASE_OPTION_SET:
|
|
113
125
|
raise ValueError(
|
|
114
|
-
f"Invalid
|
|
126
|
+
f"Invalid purchase option - please choose one of {list(PURCHASE_OPTION_SET)}"
|
|
115
127
|
)
|
|
128
|
+
user_purchase_option = user_purchase_option.replace("-", "")
|
|
116
129
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
model_details=BatchJobModelDetails(
|
|
129
|
-
model_id=execution_config.execution.model_id,
|
|
130
|
-
build_id=execution_config.execution.build_id,
|
|
130
|
+
start_job_result: StartBatchJobResponse = self._start_batch_job(
|
|
131
|
+
batch_job_request=BatchJobRequest(
|
|
132
|
+
model_details=BatchJobModelDetails(
|
|
133
|
+
model_id=execution_config.execution.model_id,
|
|
134
|
+
build_id=execution_config.execution.build_id,
|
|
135
|
+
),
|
|
136
|
+
data_details=BatchJobDataDetails(
|
|
137
|
+
source_path=BatchJobSourcePath(
|
|
138
|
+
source_folder=execution_config.execution.source_folder,
|
|
139
|
+
source_bucket=execution_config.execution.source_bucket,
|
|
140
|
+
input_file_type=INPUT_FORMATTERS_MAP.get(user_raw_input_format),
|
|
131
141
|
),
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
user_raw_input_format
|
|
138
|
-
),
|
|
139
|
-
),
|
|
140
|
-
destination_path=BatchJobDestinationPath(
|
|
141
|
-
destination_bucket=execution_config.execution.destination_bucket,
|
|
142
|
-
destination_folder=execution_config.execution.destination_folder,
|
|
143
|
-
output_file_type=OUTPUT_FORMATTERS_MAP.get(
|
|
144
|
-
user_raw_output_format
|
|
145
|
-
),
|
|
142
|
+
destination_path=BatchJobDestinationPath(
|
|
143
|
+
destination_bucket=execution_config.execution.destination_bucket,
|
|
144
|
+
destination_folder=execution_config.execution.destination_folder,
|
|
145
|
+
output_file_type=OUTPUT_FORMATTERS_MAP.get(
|
|
146
|
+
user_raw_output_format
|
|
146
147
|
),
|
|
147
|
-
token_secret=execution_config.execution.access_token_name,
|
|
148
|
-
secret_secret=execution_config.execution.access_secret_name,
|
|
149
|
-
session_token=execution_config.execution.session_token,
|
|
150
|
-
service_account_json_key_secret=execution_config.execution.service_account_key_secret_name,
|
|
151
148
|
),
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
149
|
+
token_secret=execution_config.execution.access_token_name,
|
|
150
|
+
secret_secret=execution_config.execution.access_secret_name,
|
|
151
|
+
session_token=execution_config.execution.session_token,
|
|
152
|
+
service_account_json_key_secret=execution_config.execution.service_account_key_secret_name,
|
|
153
|
+
),
|
|
154
|
+
execution_details=BatchJobExecutionDetails(
|
|
155
|
+
job_timeout=execution_config.execution.job_timeout,
|
|
156
|
+
task_timeout=execution_config.execution.file_timeout,
|
|
157
|
+
batch_job_deployment_size=job_size,
|
|
158
|
+
advanced_deployment_options=AdvancedDeploymentOptions(
|
|
159
|
+
custom_iam_role_arn=execution_config.advanced_options.custom_iam_role_arn,
|
|
160
|
+
purchase_option=user_purchase_option,
|
|
161
|
+
service_account_key_secret_name=execution_config.advanced_options.service_account_key_secret_name,
|
|
164
162
|
),
|
|
165
|
-
|
|
163
|
+
parameters=BatchJobManagerClient._batch_job_parameters_as_list(
|
|
164
|
+
execution_config.execution.parameters
|
|
165
|
+
),
|
|
166
|
+
),
|
|
166
167
|
)
|
|
168
|
+
)
|
|
167
169
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
except grpc.RpcError as e:
|
|
174
|
-
raise FrogmlException(f"Failed to start execution, error is {e}")
|
|
170
|
+
return StartExecutionResult(
|
|
171
|
+
success=start_job_result.success,
|
|
172
|
+
execution_id=start_job_result.batch_id,
|
|
173
|
+
failure_message=start_job_result.failure_message,
|
|
174
|
+
)
|
|
175
175
|
|
|
176
176
|
@staticmethod
|
|
177
177
|
def get_batch_deployment_size_from_resources(execution_config):
|
|
@@ -207,6 +207,9 @@ class BatchJobManagerClient:
|
|
|
207
207
|
),
|
|
208
208
|
)
|
|
209
209
|
|
|
210
|
+
@grpc_try_catch_wrapper(
|
|
211
|
+
error_message="Failed to start execution", operation="Start Batch Execution"
|
|
212
|
+
)
|
|
210
213
|
def _start_batch_job(
|
|
211
214
|
self,
|
|
212
215
|
batch_job_request: BatchJobRequest,
|
|
@@ -244,28 +247,30 @@ class BatchJobManagerClient:
|
|
|
244
247
|
"""
|
|
245
248
|
job_size = self.get_batch_deployment_size_from_resources(execution_config)
|
|
246
249
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
warmup_timeout=execution_config.warmup.timeout,
|
|
257
|
-
batch_job_deployment_size=job_size,
|
|
258
|
-
),
|
|
259
|
-
timeout=CLIENT_TIMEOUT,
|
|
260
|
-
)
|
|
250
|
+
start_warmup_job_result: StartWarmupJobResponse = self._start_warmup_job(
|
|
251
|
+
StartWarmupJobRequest(
|
|
252
|
+
model_id=execution_config.execution.model_id,
|
|
253
|
+
build_id=execution_config.execution.build_id,
|
|
254
|
+
branch_id=ModelsManagementClient().get_model_uuid(
|
|
255
|
+
execution_config.execution.model_id,
|
|
256
|
+
),
|
|
257
|
+
warmup_timeout=execution_config.warmup.timeout,
|
|
258
|
+
batch_job_deployment_size=job_size,
|
|
261
259
|
)
|
|
260
|
+
)
|
|
262
261
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
262
|
+
return StartWarmupJobResponse(
|
|
263
|
+
success=start_warmup_job_result.success,
|
|
264
|
+
failure_message=start_warmup_job_result.failure_message,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
@grpc_try_catch_wrapper(
|
|
268
|
+
error_message="Failed to start warmup", operation="Start Batch Warmup Job"
|
|
269
|
+
)
|
|
270
|
+
def _start_warmup_job(
|
|
271
|
+
self, request: StartWarmupJobRequest
|
|
272
|
+
) -> StartWarmupJobResponse:
|
|
273
|
+
return self.batch_job_management.StartWarmupJob(request, timeout=CLIENT_TIMEOUT)
|
|
269
274
|
|
|
270
275
|
def get_execution_status(self, execution_id: str) -> ExecutionStatusResult:
|
|
271
276
|
"""
|
|
@@ -277,25 +282,26 @@ class BatchJobManagerClient:
|
|
|
277
282
|
the status of the execution
|
|
278
283
|
|
|
279
284
|
"""
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
)
|
|
285
|
+
batch_job_status: GetBatchJobStatusResponse = self._get_batch_job_status(
|
|
286
|
+
execution_id
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
return ExecutionStatusResult(
|
|
290
|
+
success=batch_job_status.success,
|
|
291
|
+
status=BatchJobStatusMessage.Name(batch_job_status.job_status),
|
|
292
|
+
finished_files=batch_job_status.finished_files,
|
|
293
|
+
total_files=batch_job_status.total_files,
|
|
294
|
+
failure_message=batch_job_status.failure_message,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
@grpc_try_catch_wrapper(
|
|
298
|
+
error_message="Failed to get execution status for execution '{execution_id}'",
|
|
299
|
+
operation="Get Batch Execution Status",
|
|
300
|
+
)
|
|
301
|
+
def _get_batch_job_status(self, execution_id: str) -> GetBatchJobStatusResponse:
|
|
302
|
+
return self.batch_job_management.GetBatchJobStatus(
|
|
303
|
+
GetBatchJobStatusRequest(batch_id=execution_id)
|
|
304
|
+
)
|
|
299
305
|
|
|
300
306
|
def cancel_warmup(
|
|
301
307
|
self, execution_config: ExecutionConfig
|
|
@@ -309,27 +315,30 @@ class BatchJobManagerClient:
|
|
|
309
315
|
Returns:
|
|
310
316
|
The response received from the api. On successful start of execution a batch job id is returned
|
|
311
317
|
"""
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
execution_config.execution.model_id,
|
|
320
|
-
),
|
|
321
|
-
),
|
|
322
|
-
timeout=CLIENT_TIMEOUT,
|
|
323
|
-
)
|
|
318
|
+
cancel_warmup_job_response: CancelWarmupJobResponse = self._cancel_warmup_job(
|
|
319
|
+
CancelWarmupJobRequest(
|
|
320
|
+
model_id=execution_config.execution.model_id,
|
|
321
|
+
build_id=execution_config.execution.build_id,
|
|
322
|
+
branch_id=ModelsManagementClient().get_model_uuid(
|
|
323
|
+
execution_config.execution.model_id,
|
|
324
|
+
),
|
|
324
325
|
)
|
|
326
|
+
)
|
|
325
327
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
328
|
+
return CancelWarmupJobResponse(
|
|
329
|
+
success=cancel_warmup_job_response.success,
|
|
330
|
+
failure_message=cancel_warmup_job_response.failure_message,
|
|
331
|
+
)
|
|
330
332
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
+
@grpc_try_catch_wrapper(
|
|
334
|
+
error_message="Failed to cancel warmup", operation="Cancel Batch Warmup"
|
|
335
|
+
)
|
|
336
|
+
def _cancel_warmup_job(
|
|
337
|
+
self, request: CancelWarmupJobRequest
|
|
338
|
+
) -> CancelWarmupJobResponse:
|
|
339
|
+
return self.batch_job_management.CancelWarmupJob(
|
|
340
|
+
request, timeout=CLIENT_TIMEOUT
|
|
341
|
+
)
|
|
333
342
|
|
|
334
343
|
def cancel_execution(self, execution_id: str) -> CancelExecutionResult:
|
|
335
344
|
"""
|
|
@@ -341,23 +350,23 @@ class BatchJobManagerClient:
|
|
|
341
350
|
A successful response or failure of the cancel process
|
|
342
351
|
|
|
343
352
|
"""
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
CancelBatchJobRequest(
|
|
348
|
-
batch_id=execution_id,
|
|
349
|
-
),
|
|
350
|
-
timeout=CLIENT_TIMEOUT,
|
|
351
|
-
)
|
|
352
|
-
)
|
|
353
|
+
cancel_batch_job_response: CancelBatchJobResponse = self._cancel_batch_job(
|
|
354
|
+
execution_id
|
|
355
|
+
)
|
|
353
356
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
357
|
+
return CancelExecutionResult(
|
|
358
|
+
success=cancel_batch_job_response.success,
|
|
359
|
+
failure_message=cancel_batch_job_response.failure_message,
|
|
360
|
+
)
|
|
358
361
|
|
|
359
|
-
|
|
360
|
-
|
|
362
|
+
@grpc_try_catch_wrapper(
|
|
363
|
+
error_message="Failed to cancel execution '{execution_id}'",
|
|
364
|
+
operation="Cancel Batch Execution",
|
|
365
|
+
)
|
|
366
|
+
def _cancel_batch_job(self, execution_id: str) -> CancelBatchJobResponse:
|
|
367
|
+
return self.batch_job_management.CancelBatchJob(
|
|
368
|
+
CancelBatchJobRequest(batch_id=execution_id), timeout=CLIENT_TIMEOUT
|
|
369
|
+
)
|
|
361
370
|
|
|
362
371
|
def get_execution_report(
|
|
363
372
|
self,
|
|
@@ -375,42 +384,45 @@ class BatchJobManagerClient:
|
|
|
375
384
|
Returns:
|
|
376
385
|
A full report of all the events that occurred as part of the execution job.
|
|
377
386
|
"""
|
|
378
|
-
|
|
379
|
-
self.logging_client
|
|
380
|
-
|
|
381
|
-
)
|
|
382
|
-
batch_job_report: GetBatchJobReportResponse = (
|
|
383
|
-
self.batch_job_management.GetBatchJobReport(
|
|
384
|
-
GetBatchJobReportRequest(batch_id=execution_id),
|
|
385
|
-
timeout=CLIENT_TIMEOUT,
|
|
386
|
-
)
|
|
387
|
-
)
|
|
387
|
+
self.logging_client = (
|
|
388
|
+
self.logging_client if self.logging_client else LoggingClient()
|
|
389
|
+
)
|
|
388
390
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
391
|
+
batch_job_report: GetBatchJobReportResponse = self._get_batch_job_report(
|
|
392
|
+
execution_id
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
try:
|
|
396
|
+
execution_log_response: ReadLogsResponse = (
|
|
397
|
+
self.logging_client.read_execution_models_logs(
|
|
398
|
+
execution_id=execution_id,
|
|
399
|
+
model_id=model_id,
|
|
400
|
+
model_group_name=model_group_name,
|
|
396
401
|
)
|
|
397
|
-
execution_logs = execution_log_response.log_line
|
|
398
|
-
execution_logs.sort(key=lambda line: line.ingested_iso_timestamp)
|
|
399
|
-
execution_logs_msgs = [line.text for line in execution_logs]
|
|
400
|
-
except FrogmlException as e:
|
|
401
|
-
execution_logs_msgs = [
|
|
402
|
-
f"Error reading the execution model run logs due to: {e.message}"
|
|
403
|
-
]
|
|
404
|
-
|
|
405
|
-
return GetExecutionReportResult(
|
|
406
|
-
success=batch_job_report.successful,
|
|
407
|
-
failure_message=batch_job_report.failure_message,
|
|
408
|
-
records=batch_job_report.report_messages,
|
|
409
|
-
model_logs=execution_logs_msgs,
|
|
410
402
|
)
|
|
403
|
+
execution_logs = execution_log_response.log_line
|
|
404
|
+
execution_logs.sort(key=lambda line: line.ingested_iso_timestamp)
|
|
405
|
+
execution_logs_msgs = [line.text for line in execution_logs]
|
|
406
|
+
except FrogmlException as e:
|
|
407
|
+
execution_logs_msgs = [
|
|
408
|
+
f"Error reading the execution model run logs due to: {e.error_message}"
|
|
409
|
+
]
|
|
410
|
+
|
|
411
|
+
return GetExecutionReportResult(
|
|
412
|
+
success=batch_job_report.successful,
|
|
413
|
+
failure_message=batch_job_report.failure_message,
|
|
414
|
+
records=batch_job_report.report_messages,
|
|
415
|
+
model_logs=execution_logs_msgs,
|
|
416
|
+
)
|
|
411
417
|
|
|
412
|
-
|
|
413
|
-
|
|
418
|
+
@grpc_try_catch_wrapper(
|
|
419
|
+
error_message="Failed to get report for execution '{execution_id}'",
|
|
420
|
+
operation="Get Batch Execution Report",
|
|
421
|
+
)
|
|
422
|
+
def _get_batch_job_report(self, execution_id: str) -> GetBatchJobReportResponse:
|
|
423
|
+
return self.batch_job_management.GetBatchJobReport(
|
|
424
|
+
GetBatchJobReportRequest(batch_id=execution_id), timeout=CLIENT_TIMEOUT
|
|
425
|
+
)
|
|
414
426
|
|
|
415
427
|
def get_pre_signed_upload_urls_details(
|
|
416
428
|
self, model_id: str, number_of_batches: int, file_type: str = "csv"
|
|
@@ -424,31 +436,35 @@ class BatchJobManagerClient:
|
|
|
424
436
|
|
|
425
437
|
Returns: GetBatchJobPreSignedUploadUrlResult which contains the input/output path, the bucket, and the pre-signed urls
|
|
426
438
|
"""
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
file_type=file_type,
|
|
434
|
-
),
|
|
435
|
-
timeout=CLIENT_TIMEOUT,
|
|
439
|
+
response: GetBatchJobPreSignedUploadUrlResponse = (
|
|
440
|
+
self._get_pre_signed_upload_url(
|
|
441
|
+
GetBatchJobPreSignedUploadUrlRequest(
|
|
442
|
+
model_id=model_id,
|
|
443
|
+
number_of_files=number_of_batches,
|
|
444
|
+
file_type=file_type,
|
|
436
445
|
)
|
|
437
446
|
)
|
|
447
|
+
)
|
|
438
448
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
449
|
+
return GetBatchJobPreSignedUploadUrlResult(
|
|
450
|
+
success=response.success,
|
|
451
|
+
failure_message=response.failure_message,
|
|
452
|
+
input_path=response.input_path,
|
|
453
|
+
output_path=response.output_path,
|
|
454
|
+
bucket=response.bucket,
|
|
455
|
+
urls=response.urls,
|
|
456
|
+
)
|
|
447
457
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
458
|
+
@grpc_try_catch_wrapper(
|
|
459
|
+
error_message="Failed to get pre signed urls for execution",
|
|
460
|
+
operation="Get Batch Pre-Signed Upload URLs",
|
|
461
|
+
)
|
|
462
|
+
def _get_pre_signed_upload_url(
|
|
463
|
+
self, request: GetBatchJobPreSignedUploadUrlRequest
|
|
464
|
+
) -> GetBatchJobPreSignedUploadUrlResponse:
|
|
465
|
+
return self.batch_job_management.GetBatchJobPreSignedUploadUrl(
|
|
466
|
+
request, timeout=CLIENT_TIMEOUT
|
|
467
|
+
)
|
|
452
468
|
|
|
453
469
|
def get_pre_signed_download_urls_details(
|
|
454
470
|
self, execution_id: str
|
|
@@ -461,27 +477,32 @@ class BatchJobManagerClient:
|
|
|
461
477
|
Returns: GetBatchJobPreSignedDownloadUrlResult which contains the pre-signed urls of the output files
|
|
462
478
|
|
|
463
479
|
"""
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
GetBatchJobPreSignedDownloadUrlRequest(
|
|
468
|
-
job_id=execution_id,
|
|
469
|
-
),
|
|
470
|
-
timeout=CLIENT_TIMEOUT,
|
|
471
|
-
)
|
|
472
|
-
)
|
|
480
|
+
response: GetBatchJobPreSignedDownloadUrlResponse = (
|
|
481
|
+
self._get_pre_signed_download_url(execution_id)
|
|
482
|
+
)
|
|
473
483
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
484
|
+
return GetBatchJobPreSignedDownloadUrlResult(
|
|
485
|
+
success=response.success,
|
|
486
|
+
failure_message=response.failure_message,
|
|
487
|
+
urls=response.urls,
|
|
488
|
+
)
|
|
479
489
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
490
|
+
@grpc_try_catch_wrapper(
|
|
491
|
+
error_message="Failed to get pre signed download urls for execution '{execution_id}'",
|
|
492
|
+
operation="Get Batch Pre-Signed Download URLs",
|
|
493
|
+
)
|
|
494
|
+
def _get_pre_signed_download_url(
|
|
495
|
+
self, execution_id: str
|
|
496
|
+
) -> GetBatchJobPreSignedDownloadUrlResponse:
|
|
497
|
+
return self.batch_job_management.GetBatchJobPreSignedDownloadUrl(
|
|
498
|
+
GetBatchJobPreSignedDownloadUrlRequest(job_id=execution_id),
|
|
499
|
+
timeout=CLIENT_TIMEOUT,
|
|
500
|
+
)
|
|
484
501
|
|
|
502
|
+
@grpc_try_catch_wrapper(
|
|
503
|
+
error_message="Failed to get upload details",
|
|
504
|
+
operation="Get Batch Upload Details",
|
|
505
|
+
)
|
|
485
506
|
def get_upload_details(self, model_id: str) -> GetBatchJobUploadDetailsResponse:
|
|
486
507
|
"""
|
|
487
508
|
Get upload details in order to start a job using FrogML cloud bucket
|
|
@@ -490,17 +511,17 @@ class BatchJobManagerClient:
|
|
|
490
511
|
|
|
491
512
|
Returns: GetBatchJobPreSignedUploadUrlResult which contains the input/output path, the bucket, and temporary credentials
|
|
492
513
|
"""
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
)
|
|
500
|
-
|
|
501
|
-
except grpc.RpcError as e:
|
|
502
|
-
raise FrogmlException(f"Failed to get upload details, error is: {e}")
|
|
514
|
+
return self.batch_job_management.GetBatchJobUploadDetails(
|
|
515
|
+
GetBatchJobUploadDetailsRequest(
|
|
516
|
+
model_id=model_id,
|
|
517
|
+
),
|
|
518
|
+
timeout=CLIENT_TIMEOUT,
|
|
519
|
+
)
|
|
503
520
|
|
|
521
|
+
@grpc_try_catch_wrapper(
|
|
522
|
+
error_message="Failed to get download details",
|
|
523
|
+
operation="Get Batch Download Details",
|
|
524
|
+
)
|
|
504
525
|
def get_download_details(
|
|
505
526
|
self, execution_id: str
|
|
506
527
|
) -> GetBatchJobDownloadDetailsResponse:
|
|
@@ -511,17 +532,16 @@ class BatchJobManagerClient:
|
|
|
511
532
|
|
|
512
533
|
Returns: GetBatchJobDownloadDetailsResponse which contains the keys the bucket, and temporary credentials
|
|
513
534
|
"""
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
)
|
|
521
|
-
|
|
522
|
-
except grpc.RpcError as e:
|
|
523
|
-
raise FrogmlException(f"Failed to get download details, error is: {e}")
|
|
535
|
+
return self.batch_job_management.GetBatchJobDownloadDetails(
|
|
536
|
+
GetBatchJobDownloadDetailsRequest(
|
|
537
|
+
job_id=execution_id,
|
|
538
|
+
),
|
|
539
|
+
timeout=CLIENT_TIMEOUT,
|
|
540
|
+
)
|
|
524
541
|
|
|
542
|
+
@grpc_try_catch_wrapper(
|
|
543
|
+
error_message="Failed to list batch jobs", operation="List Batch Jobs"
|
|
544
|
+
)
|
|
525
545
|
def list_batch_jobs(self, model_id: str, build_id: str) -> ListBatchJobsResponse:
|
|
526
546
|
"""
|
|
527
547
|
List batch jobs by its model ID
|
|
@@ -531,15 +551,15 @@ class BatchJobManagerClient:
|
|
|
531
551
|
|
|
532
552
|
Returns: ListBatchJobsResponse which contains list of batch jobs details
|
|
533
553
|
"""
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
)
|
|
539
|
-
|
|
540
|
-
except grpc.RpcError as e:
|
|
541
|
-
raise FrogmlException(f"Failed to list batch jobs, error is: {e}")
|
|
554
|
+
return self.batch_job_management.ListBatchJobs(
|
|
555
|
+
ListBatchJobsRequest(model_id=model_id, build_id=build_id),
|
|
556
|
+
timeout=CLIENT_TIMEOUT,
|
|
557
|
+
)
|
|
542
558
|
|
|
559
|
+
@grpc_try_catch_wrapper(
|
|
560
|
+
error_message="Failed to get batch job details",
|
|
561
|
+
operation="Get Batch Job Details",
|
|
562
|
+
)
|
|
543
563
|
def get_batch_job_details(self, job_id: str) -> GetBatchJobDetailsResponse:
|
|
544
564
|
"""
|
|
545
565
|
Get batch jos by its job ID
|
|
@@ -548,16 +568,16 @@ class BatchJobManagerClient:
|
|
|
548
568
|
|
|
549
569
|
Returns: GetBatchJobDetailsResponse which contains list of task execution details
|
|
550
570
|
"""
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
)
|
|
556
|
-
|
|
557
|
-
except grpc.RpcError as e:
|
|
558
|
-
raise FrogmlException(f"Failed to get batch job details, error is: {e}")
|
|
571
|
+
return self.batch_job_management.GetBatchJobDetails(
|
|
572
|
+
GetBatchJobDetailsRequest(job_id=job_id),
|
|
573
|
+
timeout=CLIENT_TIMEOUT,
|
|
574
|
+
)
|
|
559
575
|
|
|
560
|
-
|
|
576
|
+
@grpc_try_catch_wrapper(
|
|
577
|
+
error_message="Failed to update task details for task '{task_id}'",
|
|
578
|
+
operation="Update Batch Task Details",
|
|
579
|
+
)
|
|
580
|
+
def update_task_details(self, task_id: str, input_file_paths: list[str]):
|
|
561
581
|
"""
|
|
562
582
|
Update task details for a specific task using the provided list of input file paths.
|
|
563
583
|
|
|
@@ -579,14 +599,6 @@ class BatchJobManagerClient:
|
|
|
579
599
|
tasks_details=[batch_task_details] # Wrap in a list
|
|
580
600
|
)
|
|
581
601
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
update_request, timeout=CLIENT_TIMEOUT
|
|
586
|
-
)
|
|
587
|
-
)
|
|
588
|
-
return response
|
|
589
|
-
except grpc.RpcError as e:
|
|
590
|
-
raise FrogmlException(
|
|
591
|
-
f"Failed to update task details for task '{task_id}', error is: {e}"
|
|
592
|
-
)
|
|
602
|
+
return self.batch_job_management.UpdateTasksDetails(
|
|
603
|
+
update_request, timeout=CLIENT_TIMEOUT
|
|
604
|
+
)
|