sagemaker-core 2.1.1__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sagemaker/__init__.py +2 -0
- sagemaker/core/common_utils.py +119 -1
- sagemaker/core/experiments/experiment.py +3 -0
- sagemaker/core/fw_utils.py +56 -12
- sagemaker/core/git_utils.py +66 -0
- sagemaker/core/helper/session_helper.py +22 -10
- sagemaker/core/image_retriever/image_retriever_utils.py +1 -3
- sagemaker/core/image_uri_config/huggingface-llm-neuronx.json +111 -1
- sagemaker/core/image_uri_config/huggingface-llm.json +110 -1
- sagemaker/core/image_uri_config/huggingface-neuronx.json +182 -6
- sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json +38 -0
- sagemaker/core/image_uri_config/huggingface.json +151 -2
- sagemaker/core/image_uri_config/sagemaker-tritonserver.json +40 -0
- sagemaker/core/image_uri_config/sklearn.json +48 -0
- sagemaker/core/image_uri_config/xgboost.json +84 -0
- sagemaker/core/image_uris.py +9 -3
- sagemaker/core/iterators.py +11 -0
- sagemaker/core/jumpstart/models.py +2 -0
- sagemaker/core/jumpstart/region_config.json +8 -0
- sagemaker/core/local/data.py +10 -0
- sagemaker/core/local/utils.py +6 -5
- sagemaker/core/model_monitor/clarify_model_monitoring.py +2 -0
- sagemaker/core/model_registry.py +1 -1
- sagemaker/core/modules/configs.py +14 -1
- sagemaker/core/modules/train/container_drivers/common/utils.py +2 -10
- sagemaker/core/modules/train/sm_recipes/utils.py +1 -1
- sagemaker/core/processing.py +2 -0
- sagemaker/core/remote_function/client.py +31 -6
- sagemaker/core/remote_function/core/pipeline_variables.py +0 -6
- sagemaker/core/remote_function/core/serialization.py +16 -28
- sagemaker/core/remote_function/core/stored_function.py +8 -11
- sagemaker/core/remote_function/errors.py +1 -3
- sagemaker/core/remote_function/invoke_function.py +1 -6
- sagemaker/core/remote_function/job.py +2 -21
- sagemaker/core/telemetry/constants.py +6 -8
- sagemaker/core/telemetry/telemetry_logging.py +6 -5
- sagemaker/core/training/configs.py +16 -4
- sagemaker/core/workflow/utilities.py +10 -3
- {sagemaker_core-2.1.1.dist-info → sagemaker_core-2.3.1.dist-info}/METADATA +1 -1
- {sagemaker_core-2.1.1.dist-info → sagemaker_core-2.3.1.dist-info}/RECORD +43 -47
- sagemaker/core/huggingface/__init__.py +0 -29
- sagemaker/core/huggingface/llm_utils.py +0 -150
- sagemaker/core/huggingface/processing.py +0 -139
- sagemaker/core/huggingface/training_compiler/__init__.py +0 -0
- sagemaker/core/huggingface/training_compiler/config.py +0 -167
- sagemaker/core/image_uri_config/__init__.py +0 -13
- {sagemaker_core-2.1.1.dist-info → sagemaker_core-2.3.1.dist-info}/WHEEL +0 -0
- {sagemaker_core-2.1.1.dist-info → sagemaker_core-2.3.1.dist-info}/licenses/LICENSE +0 -0
- {sagemaker_core-2.1.1.dist-info → sagemaker_core-2.3.1.dist-info}/top_level.txt +0 -0
|
@@ -19,7 +19,6 @@ import json
|
|
|
19
19
|
import io
|
|
20
20
|
|
|
21
21
|
import sys
|
|
22
|
-
import hmac
|
|
23
22
|
import hashlib
|
|
24
23
|
import pickle
|
|
25
24
|
|
|
@@ -156,7 +155,7 @@ class CloudpickleSerializer:
|
|
|
156
155
|
|
|
157
156
|
# TODO: use dask serializer in case dask distributed is installed in users' environment.
|
|
158
157
|
def serialize_func_to_s3(
|
|
159
|
-
func: Callable, sagemaker_session: Session, s3_uri: str,
|
|
158
|
+
func: Callable, sagemaker_session: Session, s3_uri: str, s3_kms_key: str = None
|
|
160
159
|
):
|
|
161
160
|
"""Serializes function and uploads it to S3.
|
|
162
161
|
|
|
@@ -164,7 +163,6 @@ def serialize_func_to_s3(
|
|
|
164
163
|
sagemaker_session (sagemaker.core.helper.session.Session):
|
|
165
164
|
The underlying Boto3 session which AWS service calls are delegated to.
|
|
166
165
|
s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded.
|
|
167
|
-
hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized func.
|
|
168
166
|
s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3.
|
|
169
167
|
func: function to be serialized and persisted
|
|
170
168
|
Raises:
|
|
@@ -173,14 +171,13 @@ def serialize_func_to_s3(
|
|
|
173
171
|
|
|
174
172
|
_upload_payload_and_metadata_to_s3(
|
|
175
173
|
bytes_to_upload=CloudpickleSerializer.serialize(func),
|
|
176
|
-
hmac_key=hmac_key,
|
|
177
174
|
s3_uri=s3_uri,
|
|
178
175
|
sagemaker_session=sagemaker_session,
|
|
179
176
|
s3_kms_key=s3_kms_key,
|
|
180
177
|
)
|
|
181
178
|
|
|
182
179
|
|
|
183
|
-
def deserialize_func_from_s3(sagemaker_session: Session, s3_uri: str
|
|
180
|
+
def deserialize_func_from_s3(sagemaker_session: Session, s3_uri: str) -> Callable:
|
|
184
181
|
"""Downloads from S3 and then deserializes data objects.
|
|
185
182
|
|
|
186
183
|
This method downloads the serialized training job outputs to a temporary directory and
|
|
@@ -190,7 +187,6 @@ def deserialize_func_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key:
|
|
|
190
187
|
sagemaker_session (sagemaker.core.helper.session.Session):
|
|
191
188
|
The underlying sagemaker session which AWS service calls are delegated to.
|
|
192
189
|
s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded.
|
|
193
|
-
hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized func.
|
|
194
190
|
Returns :
|
|
195
191
|
The deserialized function.
|
|
196
192
|
Raises:
|
|
@@ -203,14 +199,14 @@ def deserialize_func_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key:
|
|
|
203
199
|
bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session)
|
|
204
200
|
|
|
205
201
|
_perform_integrity_check(
|
|
206
|
-
expected_hash_value=metadata.sha256_hash,
|
|
202
|
+
expected_hash_value=metadata.sha256_hash, buffer=bytes_to_deserialize
|
|
207
203
|
)
|
|
208
204
|
|
|
209
205
|
return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize)
|
|
210
206
|
|
|
211
207
|
|
|
212
208
|
def serialize_obj_to_s3(
|
|
213
|
-
obj: Any, sagemaker_session: Session, s3_uri: str,
|
|
209
|
+
obj: Any, sagemaker_session: Session, s3_uri: str, s3_kms_key: str = None
|
|
214
210
|
):
|
|
215
211
|
"""Serializes data object and uploads it to S3.
|
|
216
212
|
|
|
@@ -219,7 +215,6 @@ def serialize_obj_to_s3(
|
|
|
219
215
|
The underlying Boto3 session which AWS service calls are delegated to.
|
|
220
216
|
s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded.
|
|
221
217
|
s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3.
|
|
222
|
-
hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj.
|
|
223
218
|
obj: object to be serialized and persisted
|
|
224
219
|
Raises:
|
|
225
220
|
SerializationError: when fail to serialize object to bytes.
|
|
@@ -227,7 +222,6 @@ def serialize_obj_to_s3(
|
|
|
227
222
|
|
|
228
223
|
_upload_payload_and_metadata_to_s3(
|
|
229
224
|
bytes_to_upload=CloudpickleSerializer.serialize(obj),
|
|
230
|
-
hmac_key=hmac_key,
|
|
231
225
|
s3_uri=s3_uri,
|
|
232
226
|
sagemaker_session=sagemaker_session,
|
|
233
227
|
s3_kms_key=s3_kms_key,
|
|
@@ -274,14 +268,13 @@ def json_serialize_obj_to_s3(
|
|
|
274
268
|
)
|
|
275
269
|
|
|
276
270
|
|
|
277
|
-
def deserialize_obj_from_s3(sagemaker_session: Session, s3_uri: str
|
|
271
|
+
def deserialize_obj_from_s3(sagemaker_session: Session, s3_uri: str) -> Any:
|
|
278
272
|
"""Downloads from S3 and then deserializes data objects.
|
|
279
273
|
|
|
280
274
|
Args:
|
|
281
275
|
sagemaker_session (sagemaker.core.helper.session.Session):
|
|
282
276
|
The underlying sagemaker session which AWS service calls are delegated to.
|
|
283
277
|
s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded.
|
|
284
|
-
hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj.
|
|
285
278
|
Returns :
|
|
286
279
|
Deserialized python objects.
|
|
287
280
|
Raises:
|
|
@@ -295,14 +288,14 @@ def deserialize_obj_from_s3(sagemaker_session: Session, s3_uri: str, hmac_key: s
|
|
|
295
288
|
bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session)
|
|
296
289
|
|
|
297
290
|
_perform_integrity_check(
|
|
298
|
-
expected_hash_value=metadata.sha256_hash,
|
|
291
|
+
expected_hash_value=metadata.sha256_hash, buffer=bytes_to_deserialize
|
|
299
292
|
)
|
|
300
293
|
|
|
301
294
|
return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize)
|
|
302
295
|
|
|
303
296
|
|
|
304
297
|
def serialize_exception_to_s3(
|
|
305
|
-
exc: Exception, sagemaker_session: Session, s3_uri: str,
|
|
298
|
+
exc: Exception, sagemaker_session: Session, s3_uri: str, s3_kms_key: str = None
|
|
306
299
|
):
|
|
307
300
|
"""Serializes exception with traceback and uploads it to S3.
|
|
308
301
|
|
|
@@ -311,7 +304,6 @@ def serialize_exception_to_s3(
|
|
|
311
304
|
The underlying Boto3 session which AWS service calls are delegated to.
|
|
312
305
|
s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded.
|
|
313
306
|
s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3.
|
|
314
|
-
hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized exception.
|
|
315
307
|
exc: Exception to be serialized and persisted
|
|
316
308
|
Raises:
|
|
317
309
|
SerializationError: when fail to serialize object to bytes.
|
|
@@ -320,7 +312,6 @@ def serialize_exception_to_s3(
|
|
|
320
312
|
|
|
321
313
|
_upload_payload_and_metadata_to_s3(
|
|
322
314
|
bytes_to_upload=CloudpickleSerializer.serialize(exc),
|
|
323
|
-
hmac_key=hmac_key,
|
|
324
315
|
s3_uri=s3_uri,
|
|
325
316
|
sagemaker_session=sagemaker_session,
|
|
326
317
|
s3_kms_key=s3_kms_key,
|
|
@@ -329,7 +320,6 @@ def serialize_exception_to_s3(
|
|
|
329
320
|
|
|
330
321
|
def _upload_payload_and_metadata_to_s3(
|
|
331
322
|
bytes_to_upload: Union[bytes, io.BytesIO],
|
|
332
|
-
hmac_key: str,
|
|
333
323
|
s3_uri: str,
|
|
334
324
|
sagemaker_session: Session,
|
|
335
325
|
s3_kms_key,
|
|
@@ -338,7 +328,6 @@ def _upload_payload_and_metadata_to_s3(
|
|
|
338
328
|
|
|
339
329
|
Args:
|
|
340
330
|
bytes_to_upload (bytes): Serialized bytes to upload.
|
|
341
|
-
hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized obj.
|
|
342
331
|
s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded.
|
|
343
332
|
sagemaker_session (sagemaker.core.helper.session.Session):
|
|
344
333
|
The underlying Boto3 session which AWS service calls are delegated to.
|
|
@@ -346,7 +335,7 @@ def _upload_payload_and_metadata_to_s3(
|
|
|
346
335
|
"""
|
|
347
336
|
_upload_bytes_to_s3(bytes_to_upload, f"{s3_uri}/payload.pkl", s3_kms_key, sagemaker_session)
|
|
348
337
|
|
|
349
|
-
sha256_hash = _compute_hash(bytes_to_upload
|
|
338
|
+
sha256_hash = _compute_hash(bytes_to_upload)
|
|
350
339
|
|
|
351
340
|
_upload_bytes_to_s3(
|
|
352
341
|
_MetaData(sha256_hash).to_json(),
|
|
@@ -356,14 +345,13 @@ def _upload_payload_and_metadata_to_s3(
|
|
|
356
345
|
)
|
|
357
346
|
|
|
358
347
|
|
|
359
|
-
def deserialize_exception_from_s3(sagemaker_session: Session, s3_uri: str
|
|
348
|
+
def deserialize_exception_from_s3(sagemaker_session: Session, s3_uri: str) -> Any:
|
|
360
349
|
"""Downloads from S3 and then deserializes exception.
|
|
361
350
|
|
|
362
351
|
Args:
|
|
363
352
|
sagemaker_session (sagemaker.core.helper.session.Session):
|
|
364
353
|
The underlying sagemaker session which AWS service calls are delegated to.
|
|
365
354
|
s3_uri (str): S3 root uri to which resulting serialized artifacts will be uploaded.
|
|
366
|
-
hmac_key (str): Key used to calculate hmac-sha256 hash of the serialized exception.
|
|
367
355
|
Returns :
|
|
368
356
|
Deserialized exception with traceback.
|
|
369
357
|
Raises:
|
|
@@ -377,7 +365,7 @@ def deserialize_exception_from_s3(sagemaker_session: Session, s3_uri: str, hmac_
|
|
|
377
365
|
bytes_to_deserialize = _read_bytes_from_s3(f"{s3_uri}/payload.pkl", sagemaker_session)
|
|
378
366
|
|
|
379
367
|
_perform_integrity_check(
|
|
380
|
-
expected_hash_value=metadata.sha256_hash,
|
|
368
|
+
expected_hash_value=metadata.sha256_hash, buffer=bytes_to_deserialize
|
|
381
369
|
)
|
|
382
370
|
|
|
383
371
|
return CloudpickleSerializer.deserialize(f"{s3_uri}/payload.pkl", bytes_to_deserialize)
|
|
@@ -403,19 +391,19 @@ def _read_bytes_from_s3(s3_uri, sagemaker_session):
|
|
|
403
391
|
) from e
|
|
404
392
|
|
|
405
393
|
|
|
406
|
-
def _compute_hash(buffer: bytes
|
|
407
|
-
"""Compute the
|
|
408
|
-
return
|
|
394
|
+
def _compute_hash(buffer: bytes) -> str:
|
|
395
|
+
"""Compute the sha256 hash"""
|
|
396
|
+
return hashlib.sha256(buffer).hexdigest()
|
|
409
397
|
|
|
410
398
|
|
|
411
|
-
def _perform_integrity_check(expected_hash_value: str,
|
|
399
|
+
def _perform_integrity_check(expected_hash_value: str, buffer: bytes):
|
|
412
400
|
"""Performs integrity checks for serialized code/arguments uploaded to s3.
|
|
413
401
|
|
|
414
402
|
Verifies whether the hash read from s3 matches the hash calculated
|
|
415
403
|
during remote function execution.
|
|
416
404
|
"""
|
|
417
|
-
actual_hash_value = _compute_hash(buffer=buffer
|
|
418
|
-
if
|
|
405
|
+
actual_hash_value = _compute_hash(buffer=buffer)
|
|
406
|
+
if expected_hash_value != actual_hash_value:
|
|
419
407
|
raise DeserializationError(
|
|
420
408
|
"Integrity check for the serialized function or data failed. "
|
|
421
409
|
"Please restrict access to your S3 bucket"
|
|
@@ -55,7 +55,6 @@ class StoredFunction:
|
|
|
55
55
|
self,
|
|
56
56
|
sagemaker_session: Session,
|
|
57
57
|
s3_base_uri: str,
|
|
58
|
-
hmac_key: str,
|
|
59
58
|
s3_kms_key: str = None,
|
|
60
59
|
context: Context = Context(),
|
|
61
60
|
):
|
|
@@ -66,13 +65,11 @@ class StoredFunction:
|
|
|
66
65
|
AWS service calls are delegated to.
|
|
67
66
|
s3_base_uri: the base uri to which serialized artifacts will be uploaded.
|
|
68
67
|
s3_kms_key: KMS key used to encrypt artifacts uploaded to S3.
|
|
69
|
-
hmac_key: Key used to encrypt serialized and deserialized function and arguments.
|
|
70
68
|
context: Build or run context of a pipeline step.
|
|
71
69
|
"""
|
|
72
70
|
self.sagemaker_session = sagemaker_session
|
|
73
71
|
self.s3_base_uri = s3_base_uri
|
|
74
72
|
self.s3_kms_key = s3_kms_key
|
|
75
|
-
self.hmac_key = hmac_key
|
|
76
73
|
self.context = context
|
|
77
74
|
|
|
78
75
|
# For pipeline steps, function code is at: base/step_name/build_timestamp/
|
|
@@ -114,7 +111,7 @@ class StoredFunction:
|
|
|
114
111
|
sagemaker_session=self.sagemaker_session,
|
|
115
112
|
s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER),
|
|
116
113
|
s3_kms_key=self.s3_kms_key,
|
|
117
|
-
|
|
114
|
+
|
|
118
115
|
)
|
|
119
116
|
|
|
120
117
|
logger.info(
|
|
@@ -126,7 +123,7 @@ class StoredFunction:
|
|
|
126
123
|
obj=(args, kwargs),
|
|
127
124
|
sagemaker_session=self.sagemaker_session,
|
|
128
125
|
s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER),
|
|
129
|
-
|
|
126
|
+
|
|
130
127
|
s3_kms_key=self.s3_kms_key,
|
|
131
128
|
)
|
|
132
129
|
|
|
@@ -144,7 +141,7 @@ class StoredFunction:
|
|
|
144
141
|
)
|
|
145
142
|
serialization._upload_payload_and_metadata_to_s3(
|
|
146
143
|
bytes_to_upload=serialized_data.func,
|
|
147
|
-
|
|
144
|
+
|
|
148
145
|
s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER),
|
|
149
146
|
sagemaker_session=self.sagemaker_session,
|
|
150
147
|
s3_kms_key=self.s3_kms_key,
|
|
@@ -156,7 +153,7 @@ class StoredFunction:
|
|
|
156
153
|
)
|
|
157
154
|
serialization._upload_payload_and_metadata_to_s3(
|
|
158
155
|
bytes_to_upload=serialized_data.args,
|
|
159
|
-
|
|
156
|
+
|
|
160
157
|
s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER),
|
|
161
158
|
sagemaker_session=self.sagemaker_session,
|
|
162
159
|
s3_kms_key=self.s3_kms_key,
|
|
@@ -172,7 +169,7 @@ class StoredFunction:
|
|
|
172
169
|
func = serialization.deserialize_func_from_s3(
|
|
173
170
|
sagemaker_session=self.sagemaker_session,
|
|
174
171
|
s3_uri=s3_path_join(self.func_upload_path, FUNCTION_FOLDER),
|
|
175
|
-
|
|
172
|
+
|
|
176
173
|
)
|
|
177
174
|
|
|
178
175
|
logger.info(
|
|
@@ -182,7 +179,7 @@ class StoredFunction:
|
|
|
182
179
|
args, kwargs = serialization.deserialize_obj_from_s3(
|
|
183
180
|
sagemaker_session=self.sagemaker_session,
|
|
184
181
|
s3_uri=s3_path_join(self.func_upload_path, ARGUMENTS_FOLDER),
|
|
185
|
-
|
|
182
|
+
|
|
186
183
|
)
|
|
187
184
|
|
|
188
185
|
logger.info("Resolving pipeline variables")
|
|
@@ -190,7 +187,7 @@ class StoredFunction:
|
|
|
190
187
|
self.context,
|
|
191
188
|
args,
|
|
192
189
|
kwargs,
|
|
193
|
-
|
|
190
|
+
|
|
194
191
|
s3_base_uri=self.s3_base_uri,
|
|
195
192
|
sagemaker_session=self.sagemaker_session,
|
|
196
193
|
)
|
|
@@ -206,7 +203,7 @@ class StoredFunction:
|
|
|
206
203
|
obj=result,
|
|
207
204
|
sagemaker_session=self.sagemaker_session,
|
|
208
205
|
s3_uri=s3_path_join(self.results_upload_path, RESULTS_FOLDER),
|
|
209
|
-
|
|
206
|
+
|
|
210
207
|
s3_kms_key=self.s3_kms_key,
|
|
211
208
|
)
|
|
212
209
|
|
|
@@ -70,7 +70,7 @@ def _write_failure_reason_file(failure_msg):
|
|
|
70
70
|
f.write(failure_msg)
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def handle_error(error, sagemaker_session, s3_base_uri, s3_kms_key
|
|
73
|
+
def handle_error(error, sagemaker_session, s3_base_uri, s3_kms_key) -> int:
|
|
74
74
|
"""Handle all exceptions raised during remote function execution.
|
|
75
75
|
|
|
76
76
|
Args:
|
|
@@ -79,7 +79,6 @@ def handle_error(error, sagemaker_session, s3_base_uri, s3_kms_key, hmac_key) ->
|
|
|
79
79
|
AWS service calls are delegated to.
|
|
80
80
|
s3_base_uri (str): S3 root uri to which resulting serialized exception will be uploaded.
|
|
81
81
|
s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3.
|
|
82
|
-
hmac_key (str): Key used to calculate hmac hash of the serialized exception.
|
|
83
82
|
Returns :
|
|
84
83
|
exit_code (int): Exit code to terminate current job.
|
|
85
84
|
"""
|
|
@@ -97,7 +96,6 @@ def handle_error(error, sagemaker_session, s3_base_uri, s3_kms_key, hmac_key) ->
|
|
|
97
96
|
exc=error,
|
|
98
97
|
sagemaker_session=sagemaker_session,
|
|
99
98
|
s3_uri=s3_path_join(s3_base_uri, "exception"),
|
|
100
|
-
hmac_key=hmac_key,
|
|
101
99
|
s3_kms_key=s3_kms_key,
|
|
102
100
|
)
|
|
103
101
|
|
|
@@ -98,7 +98,7 @@ def _load_pipeline_context(args) -> Context:
|
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
def _execute_remote_function(
|
|
101
|
-
sagemaker_session, s3_base_uri, s3_kms_key, run_in_context,
|
|
101
|
+
sagemaker_session, s3_base_uri, s3_kms_key, run_in_context, context
|
|
102
102
|
):
|
|
103
103
|
"""Execute stored remote function"""
|
|
104
104
|
from sagemaker.core.remote_function.core.stored_function import StoredFunction
|
|
@@ -107,7 +107,6 @@ def _execute_remote_function(
|
|
|
107
107
|
sagemaker_session=sagemaker_session,
|
|
108
108
|
s3_base_uri=s3_base_uri,
|
|
109
109
|
s3_kms_key=s3_kms_key,
|
|
110
|
-
hmac_key=hmac_key,
|
|
111
110
|
context=context,
|
|
112
111
|
)
|
|
113
112
|
|
|
@@ -138,15 +137,12 @@ def main(sys_args=None):
|
|
|
138
137
|
run_in_context = args.run_in_context
|
|
139
138
|
pipeline_context = _load_pipeline_context(args)
|
|
140
139
|
|
|
141
|
-
hmac_key = os.getenv("REMOTE_FUNCTION_SECRET_KEY")
|
|
142
|
-
|
|
143
140
|
sagemaker_session = _get_sagemaker_session(region)
|
|
144
141
|
_execute_remote_function(
|
|
145
142
|
sagemaker_session=sagemaker_session,
|
|
146
143
|
s3_base_uri=s3_base_uri,
|
|
147
144
|
s3_kms_key=s3_kms_key,
|
|
148
145
|
run_in_context=run_in_context,
|
|
149
|
-
hmac_key=hmac_key,
|
|
150
146
|
context=pipeline_context,
|
|
151
147
|
)
|
|
152
148
|
|
|
@@ -162,7 +158,6 @@ def main(sys_args=None):
|
|
|
162
158
|
sagemaker_session=sagemaker_session,
|
|
163
159
|
s3_base_uri=s3_uri,
|
|
164
160
|
s3_kms_key=s3_kms_key,
|
|
165
|
-
hmac_key=hmac_key,
|
|
166
161
|
)
|
|
167
162
|
finally:
|
|
168
163
|
sys.exit(exit_code)
|
|
@@ -17,7 +17,6 @@ import dataclasses
|
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
19
|
import re
|
|
20
|
-
import secrets
|
|
21
20
|
import shutil
|
|
22
21
|
import sys
|
|
23
22
|
import time
|
|
@@ -621,11 +620,6 @@ class _JobSettings:
|
|
|
621
620
|
{"AWS_DEFAULT_REGION": self.sagemaker_session.boto_region_name}
|
|
622
621
|
)
|
|
623
622
|
|
|
624
|
-
# The following will be overridden by the _Job.compile method.
|
|
625
|
-
# However, it needs to be kept here for feature store SDK.
|
|
626
|
-
# TODO: update the feature store SDK to set the HMAC key there.
|
|
627
|
-
self.environment_variables.update({"REMOTE_FUNCTION_SECRET_KEY": secrets.token_hex(32)})
|
|
628
|
-
|
|
629
623
|
if spark_config and image_uri:
|
|
630
624
|
raise ValueError("spark_config and image_uri cannot be specified at the same time!")
|
|
631
625
|
|
|
@@ -839,19 +833,17 @@ class _JobSettings:
|
|
|
839
833
|
class _Job:
|
|
840
834
|
"""Helper class that interacts with the SageMaker training service."""
|
|
841
835
|
|
|
842
|
-
def __init__(self, job_name: str, s3_uri: str, sagemaker_session: Session
|
|
836
|
+
def __init__(self, job_name: str, s3_uri: str, sagemaker_session: Session):
|
|
843
837
|
"""Initialize a _Job object.
|
|
844
838
|
|
|
845
839
|
Args:
|
|
846
840
|
job_name (str): The training job name.
|
|
847
841
|
s3_uri (str): The training job output S3 uri.
|
|
848
842
|
sagemaker_session (Session): SageMaker boto session.
|
|
849
|
-
hmac_key (str): Remote function secret key.
|
|
850
843
|
"""
|
|
851
844
|
self.job_name = job_name
|
|
852
845
|
self.s3_uri = s3_uri
|
|
853
846
|
self.sagemaker_session = sagemaker_session
|
|
854
|
-
self.hmac_key = hmac_key
|
|
855
847
|
self._last_describe_response = None
|
|
856
848
|
|
|
857
849
|
@staticmethod
|
|
@@ -867,9 +859,8 @@ class _Job:
|
|
|
867
859
|
"""
|
|
868
860
|
job_name = describe_training_job_response["TrainingJobName"]
|
|
869
861
|
s3_uri = describe_training_job_response["OutputDataConfig"]["S3OutputPath"]
|
|
870
|
-
hmac_key = describe_training_job_response["Environment"]["REMOTE_FUNCTION_SECRET_KEY"]
|
|
871
862
|
|
|
872
|
-
job = _Job(job_name, s3_uri, sagemaker_session
|
|
863
|
+
job = _Job(job_name, s3_uri, sagemaker_session)
|
|
873
864
|
job._last_describe_response = describe_training_job_response
|
|
874
865
|
return job
|
|
875
866
|
|
|
@@ -907,7 +898,6 @@ class _Job:
|
|
|
907
898
|
job_name,
|
|
908
899
|
s3_base_uri,
|
|
909
900
|
job_settings.sagemaker_session,
|
|
910
|
-
training_job_request["Environment"]["REMOTE_FUNCTION_SECRET_KEY"],
|
|
911
901
|
)
|
|
912
902
|
|
|
913
903
|
@staticmethod
|
|
@@ -935,18 +925,11 @@ class _Job:
|
|
|
935
925
|
|
|
936
926
|
jobs_container_entrypoint = JOBS_CONTAINER_ENTRYPOINT[:]
|
|
937
927
|
|
|
938
|
-
# generate hmac key for integrity check
|
|
939
|
-
if step_compilation_context is None:
|
|
940
|
-
hmac_key = secrets.token_hex(32)
|
|
941
|
-
else:
|
|
942
|
-
hmac_key = step_compilation_context.function_step_secret_token
|
|
943
|
-
|
|
944
928
|
# serialize function and arguments
|
|
945
929
|
if step_compilation_context is None:
|
|
946
930
|
stored_function = StoredFunction(
|
|
947
931
|
sagemaker_session=job_settings.sagemaker_session,
|
|
948
932
|
s3_base_uri=s3_base_uri,
|
|
949
|
-
hmac_key=hmac_key,
|
|
950
933
|
s3_kms_key=job_settings.s3_kms_key,
|
|
951
934
|
)
|
|
952
935
|
stored_function.save(func, *func_args, **func_kwargs)
|
|
@@ -954,7 +937,6 @@ class _Job:
|
|
|
954
937
|
stored_function = StoredFunction(
|
|
955
938
|
sagemaker_session=job_settings.sagemaker_session,
|
|
956
939
|
s3_base_uri=s3_base_uri,
|
|
957
|
-
hmac_key=hmac_key,
|
|
958
940
|
s3_kms_key=job_settings.s3_kms_key,
|
|
959
941
|
context=Context(
|
|
960
942
|
step_name=step_compilation_context.step_name,
|
|
@@ -1114,7 +1096,6 @@ class _Job:
|
|
|
1114
1096
|
request_dict["EnableManagedSpotTraining"] = job_settings.use_spot_instances
|
|
1115
1097
|
|
|
1116
1098
|
request_dict["Environment"] = job_settings.environment_variables
|
|
1117
|
-
request_dict["Environment"].update({"REMOTE_FUNCTION_SECRET_KEY": hmac_key})
|
|
1118
1099
|
|
|
1119
1100
|
extended_request = _extend_spark_config_to_request(request_dict, job_settings, s3_base_uri)
|
|
1120
1101
|
extended_request = _extend_mpirun_to_request(extended_request, job_settings)
|
|
@@ -22,14 +22,12 @@ DEFAULT_AWS_REGION = "us-west-2"
|
|
|
22
22
|
class Feature(Enum):
|
|
23
23
|
"""Enumeration of feature names used in telemetry."""
|
|
24
24
|
|
|
25
|
-
SDK_DEFAULTS =
|
|
26
|
-
LOCAL_MODE =
|
|
27
|
-
REMOTE_FUNCTION =
|
|
28
|
-
MODEL_TRAINER =
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
HYPERPOD_CLI = 7 # Added to support telemetry in sagemaker-hyperpod-cli
|
|
32
|
-
MODEL_CUSTOMIZATION = 8
|
|
25
|
+
SDK_DEFAULTS = 11
|
|
26
|
+
LOCAL_MODE = 12
|
|
27
|
+
REMOTE_FUNCTION = 13
|
|
28
|
+
MODEL_TRAINER = 14
|
|
29
|
+
MODEL_CUSTOMIZATION = 15
|
|
30
|
+
MLOPS = 16
|
|
33
31
|
|
|
34
32
|
def __str__(self): # pylint: disable=E0307
|
|
35
33
|
"""Return the feature name."""
|
|
@@ -50,11 +50,12 @@ TELEMETRY_OPT_OUT_MESSAGING = (
|
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
FEATURE_TO_CODE = {
|
|
53
|
-
str(Feature.SDK_DEFAULTS):
|
|
54
|
-
str(Feature.LOCAL_MODE):
|
|
55
|
-
str(Feature.REMOTE_FUNCTION):
|
|
56
|
-
str(Feature.MODEL_TRAINER):
|
|
57
|
-
str(Feature.MODEL_CUSTOMIZATION):
|
|
53
|
+
str(Feature.SDK_DEFAULTS): 11,
|
|
54
|
+
str(Feature.LOCAL_MODE): 12,
|
|
55
|
+
str(Feature.REMOTE_FUNCTION): 13,
|
|
56
|
+
str(Feature.MODEL_TRAINER): 14,
|
|
57
|
+
str(Feature.MODEL_CUSTOMIZATION): 15,
|
|
58
|
+
str(Feature.MLOPS): 16,
|
|
58
59
|
}
|
|
59
60
|
|
|
60
61
|
STATUS_TO_CODE = {
|
|
@@ -21,7 +21,7 @@ For more documentation on ``sagemaker.core.shapes``, see:
|
|
|
21
21
|
|
|
22
22
|
from __future__ import absolute_import
|
|
23
23
|
|
|
24
|
-
from typing import Optional, Union
|
|
24
|
+
from typing import Optional, Union, List
|
|
25
25
|
from pydantic import BaseModel, model_validator, ConfigDict
|
|
26
26
|
|
|
27
27
|
import sagemaker.core.shapes as shapes
|
|
@@ -106,18 +106,29 @@ class SourceCode(BaseConfig):
|
|
|
106
106
|
command (Optional[StrPipeVar]):
|
|
107
107
|
The command(s) to execute in the training job container. Example: "python my_script.py".
|
|
108
108
|
If not specified, entry_script must be provided.
|
|
109
|
+
ignore_patterns: (Optional[List[str]]) :
|
|
110
|
+
The ignore patterns to ignore specific files/folders when uploading to S3. If not specified,
|
|
111
|
+
default to: ['.env', '.git', '__pycache__', '.DS_Store', '.cache', '.ipynb_checkpoints'].
|
|
109
112
|
"""
|
|
110
113
|
|
|
111
114
|
source_dir: Optional[StrPipeVar] = None
|
|
112
115
|
requirements: Optional[StrPipeVar] = None
|
|
113
116
|
entry_script: Optional[StrPipeVar] = None
|
|
114
117
|
command: Optional[StrPipeVar] = None
|
|
115
|
-
|
|
118
|
+
ignore_patterns: Optional[List[str]] = [
|
|
119
|
+
".env",
|
|
120
|
+
".git",
|
|
121
|
+
"__pycache__",
|
|
122
|
+
".DS_Store",
|
|
123
|
+
".cache",
|
|
124
|
+
".ipynb_checkpoints",
|
|
125
|
+
]
|
|
116
126
|
|
|
117
127
|
class OutputDataConfig(shapes.OutputDataConfig):
|
|
118
128
|
"""OutputDataConfig.
|
|
119
129
|
|
|
120
|
-
Provides the configuration for the output data location of the training job
|
|
130
|
+
Provides the configuration for the output data location of the training job
|
|
131
|
+
(will not be carried over to any model repository or deployment).
|
|
121
132
|
|
|
122
133
|
Parameters:
|
|
123
134
|
s3_output_path (Optional[StrPipeVar]):
|
|
@@ -274,7 +285,8 @@ class OutputDataConfig(shapes.OutputDataConfig):
|
|
|
274
285
|
"""OutputDataConfig.
|
|
275
286
|
|
|
276
287
|
The OutputDataConfig class is a subclass of ``sagemaker.core.shapes.OutputDataConfig``
|
|
277
|
-
and allows the user to specify the output data configuration for the training job
|
|
288
|
+
and allows the user to specify the output data configuration for the training job
|
|
289
|
+
(will not be carried over to any model repository or deployment).
|
|
278
290
|
|
|
279
291
|
Parameters:
|
|
280
292
|
s3_output_path (Optional[StrPipeVar]):
|
|
@@ -21,7 +21,14 @@ from typing import List, Sequence, Union, Set, TYPE_CHECKING, Optional
|
|
|
21
21
|
import hashlib
|
|
22
22
|
from urllib.parse import unquote, urlparse
|
|
23
23
|
from contextlib import contextmanager
|
|
24
|
-
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
# _hashlib is an internal python module, and is not present in
|
|
27
|
+
# statically linked interpreters.
|
|
28
|
+
from _hashlib import HASH as Hash
|
|
29
|
+
except ImportError:
|
|
30
|
+
import typing
|
|
31
|
+
Hash = typing.Any
|
|
25
32
|
|
|
26
33
|
from sagemaker.core.common_utils import base_from_name
|
|
27
34
|
from sagemaker.core.workflow.parameters import Parameter
|
|
@@ -165,9 +172,9 @@ def get_code_hash(step: Entity) -> str:
|
|
|
165
172
|
source_code = model_trainer.source_code
|
|
166
173
|
if source_code:
|
|
167
174
|
source_dir = source_code.source_dir
|
|
168
|
-
|
|
175
|
+
requirements = source_code.requirements
|
|
169
176
|
entry_point = source_code.entry_script
|
|
170
|
-
return get_training_code_hash(entry_point, source_dir,
|
|
177
|
+
return get_training_code_hash(entry_point, source_dir, requirements)
|
|
171
178
|
return None
|
|
172
179
|
|
|
173
180
|
|