sagemaker-core 1.0.62__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sagemaker/__init__.py +2 -0
- sagemaker/core/__init__.py +16 -0
- sagemaker/core/_studio.py +116 -0
- sagemaker/core/_version.py +11 -0
- sagemaker/core/accept_types.py +131 -0
- sagemaker/core/analytics.py +744 -0
- sagemaker/core/apiutils/__init__.py +13 -0
- sagemaker/core/apiutils/_base_types.py +228 -0
- sagemaker/core/apiutils/_boto_functions.py +130 -0
- sagemaker/core/apiutils/_utils.py +34 -0
- sagemaker/core/base_deserializers.py +35 -0
- sagemaker/core/base_serializers.py +35 -0
- sagemaker/core/clarify/__init__.py +2898 -0
- sagemaker/core/collection.py +467 -0
- sagemaker/core/common_utils.py +2399 -0
- sagemaker/core/compute_resource_requirements/__init__.py +18 -0
- sagemaker/core/compute_resource_requirements/resource_requirements.py +94 -0
- sagemaker/core/config/__init__.py +181 -0
- sagemaker/core/config/config.py +238 -0
- sagemaker/core/config/config_manager.py +595 -0
- sagemaker/core/config/config_schema.py +1220 -0
- sagemaker/core/config/config_utils.py +297 -0
- {sagemaker_core/main → sagemaker/core}/config_schema.py +408 -3
- sagemaker/core/constants.py +73 -0
- sagemaker/core/content_types.py +137 -0
- sagemaker/core/debugger/__init__.py +39 -0
- sagemaker/core/debugger/debugger.py +945 -0
- sagemaker/core/debugger/framework_profile.py +292 -0
- sagemaker/core/debugger/metrics_config.py +468 -0
- sagemaker/core/debugger/profiler.py +42 -0
- sagemaker/core/debugger/profiler_config.py +190 -0
- sagemaker/core/debugger/profiler_constants.py +40 -0
- sagemaker/core/debugger/utils.py +148 -0
- sagemaker/core/deprecations.py +254 -0
- sagemaker/core/deserializers/__init__.py +10 -0
- sagemaker/core/deserializers/base.py +424 -0
- sagemaker/core/deserializers/implementations.py +157 -0
- sagemaker/core/drift_check_baselines.py +106 -0
- sagemaker/core/enums.py +51 -0
- sagemaker/core/environment_variables.py +101 -0
- sagemaker/core/exceptions.py +108 -0
- sagemaker/core/experiments/__init__.py +53 -0
- sagemaker/core/experiments/_api_types.py +251 -0
- sagemaker/core/experiments/_environment.py +124 -0
- sagemaker/core/experiments/_helper.py +294 -0
- sagemaker/core/experiments/_metrics.py +333 -0
- sagemaker/core/experiments/_run_context.py +58 -0
- sagemaker/core/experiments/_utils.py +216 -0
- sagemaker/core/experiments/experiment.py +247 -0
- sagemaker/core/experiments/run.py +970 -0
- sagemaker/core/experiments/trial.py +296 -0
- sagemaker/core/experiments/trial_component.py +387 -0
- sagemaker/core/explainer/__init__.py +24 -0
- sagemaker/core/explainer/clarify_explainer_config.py +298 -0
- sagemaker/core/explainer/explainer_config.py +44 -0
- sagemaker/core/fw_utils.py +1220 -0
- sagemaker/core/git_utils.py +415 -0
- sagemaker/core/helper/pipeline_variable.py +82 -0
- sagemaker/core/helper/session_helper.py +2977 -0
- sagemaker/core/hyperparameters.py +172 -0
- sagemaker/core/image_retriever/__init__.py +3 -0
- sagemaker/core/image_retriever/image_retriever.py +640 -0
- sagemaker/core/image_retriever/image_retriever_utils.py +509 -0
- sagemaker/core/image_retriever/test.py +7 -0
- sagemaker/core/image_uri_config/autogluon.json +1335 -0
- sagemaker/core/image_uri_config/blazingtext.json +50 -0
- sagemaker/core/image_uri_config/chainer.json +104 -0
- sagemaker/core/image_uri_config/clarify.json +39 -0
- sagemaker/core/image_uri_config/coach-mxnet.json +70 -0
- sagemaker/core/image_uri_config/coach-tensorflow.json +186 -0
- sagemaker/core/image_uri_config/data-wrangler.json +91 -0
- sagemaker/core/image_uri_config/debugger.json +34 -0
- sagemaker/core/image_uri_config/detailed-profiler.json +18 -0
- sagemaker/core/image_uri_config/djl-deepspeed.json +385 -0
- sagemaker/core/image_uri_config/djl-fastertransformer.json +167 -0
- sagemaker/core/image_uri_config/djl-lmi.json +136 -0
- sagemaker/core/image_uri_config/djl-neuronx.json +258 -0
- sagemaker/core/image_uri_config/djl-tensorrtllm.json +262 -0
- sagemaker/core/image_uri_config/factorization-machines.json +50 -0
- sagemaker/core/image_uri_config/forecasting-deepar.json +50 -0
- sagemaker/core/image_uri_config/huggingface-llm-neuronx.json +770 -0
- sagemaker/core/image_uri_config/huggingface-llm.json +1267 -0
- sagemaker/core/image_uri_config/huggingface-neuron.json +52 -0
- sagemaker/core/image_uri_config/huggingface-neuronx.json +686 -0
- sagemaker/core/image_uri_config/huggingface-tei-cpu.json +298 -0
- sagemaker/core/image_uri_config/huggingface-tei.json +298 -0
- sagemaker/core/image_uri_config/huggingface-training-compiler.json +195 -0
- sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json +38 -0
- sagemaker/core/image_uri_config/huggingface.json +2287 -0
- sagemaker/core/image_uri_config/hyperpod-recipes-neuron.json +52 -0
- sagemaker/core/image_uri_config/image-classification-neo.json +43 -0
- sagemaker/core/image_uri_config/image-classification.json +50 -0
- sagemaker/core/image_uri_config/inferentia-mxnet.json +88 -0
- sagemaker/core/image_uri_config/inferentia-pytorch.json +127 -0
- sagemaker/core/image_uri_config/inferentia-tensorflow.json +88 -0
- sagemaker/core/image_uri_config/instance_gpu_info.json +782 -0
- sagemaker/core/image_uri_config/ipinsights.json +50 -0
- sagemaker/core/image_uri_config/kmeans.json +50 -0
- sagemaker/core/image_uri_config/knn.json +50 -0
- sagemaker/core/image_uri_config/lda.json +26 -0
- sagemaker/core/image_uri_config/linear-learner.json +50 -0
- sagemaker/core/image_uri_config/model-monitor.json +42 -0
- sagemaker/core/image_uri_config/mxnet.json +1154 -0
- sagemaker/core/image_uri_config/neo-mxnet.json +64 -0
- sagemaker/core/image_uri_config/neo-pytorch.json +341 -0
- sagemaker/core/image_uri_config/neo-tensorflow.json +109 -0
- sagemaker/core/image_uri_config/ntm.json +50 -0
- sagemaker/core/image_uri_config/object-detection.json +50 -0
- sagemaker/core/image_uri_config/object2vec.json +50 -0
- sagemaker/core/image_uri_config/pca.json +50 -0
- sagemaker/core/image_uri_config/pytorch-neuron.json +43 -0
- sagemaker/core/image_uri_config/pytorch-smp.json +218 -0
- sagemaker/core/image_uri_config/pytorch-training-compiler.json +80 -0
- sagemaker/core/image_uri_config/pytorch.json +3101 -0
- sagemaker/core/image_uri_config/randomcutforest.json +50 -0
- sagemaker/core/image_uri_config/ray-pytorch.json +46 -0
- sagemaker/core/image_uri_config/ray-tensorflow.json +194 -0
- sagemaker/core/image_uri_config/sagemaker-base-python.json +46 -0
- sagemaker/core/image_uri_config/sagemaker-distribution.json +37 -0
- sagemaker/core/image_uri_config/sagemaker-geospatial.json +13 -0
- sagemaker/core/image_uri_config/sagemaker-tritonserver.json +252 -0
- sagemaker/core/image_uri_config/semantic-segmentation.json +50 -0
- sagemaker/core/image_uri_config/seq2seq.json +50 -0
- sagemaker/core/image_uri_config/sklearn.json +494 -0
- sagemaker/core/image_uri_config/spark.json +280 -0
- sagemaker/core/image_uri_config/sparkml-serving.json +97 -0
- sagemaker/core/image_uri_config/stabilityai.json +53 -0
- sagemaker/core/image_uri_config/tensorflow.json +5086 -0
- sagemaker/core/image_uri_config/vw.json +25 -0
- sagemaker/core/image_uri_config/xgboost-neo.json +43 -0
- sagemaker/core/image_uri_config/xgboost.json +972 -0
- sagemaker/core/image_uris.py +816 -0
- sagemaker/core/inference_config.py +144 -0
- sagemaker/core/inference_recommender/__init__.py +18 -0
- sagemaker/core/inference_recommender/inference_recommender_mixin.py +622 -0
- sagemaker/core/inputs.py +366 -0
- sagemaker/core/instance_group.py +61 -0
- sagemaker/core/instance_types.py +164 -0
- sagemaker/core/instance_types_gpu_info.py +43 -0
- sagemaker/core/interactive_apps/__init__.py +41 -0
- sagemaker/core/interactive_apps/base_interactive_app.py +204 -0
- sagemaker/core/interactive_apps/detail_profiler_app.py +139 -0
- sagemaker/core/interactive_apps/tensorboard.py +149 -0
- sagemaker/core/iterators.py +197 -0
- sagemaker/core/job.py +380 -0
- sagemaker/core/jumpstart/__init__.py +156 -0
- sagemaker/core/jumpstart/accessors.py +390 -0
- sagemaker/core/jumpstart/artifacts/__init__.py +69 -0
- sagemaker/core/jumpstart/artifacts/environment_variables.py +252 -0
- sagemaker/core/jumpstart/artifacts/hyperparameters.py +120 -0
- sagemaker/core/jumpstart/artifacts/image_uris.py +139 -0
- sagemaker/core/jumpstart/artifacts/incremental_training.py +87 -0
- sagemaker/core/jumpstart/artifacts/instance_types.py +223 -0
- sagemaker/core/jumpstart/artifacts/kwargs.py +289 -0
- sagemaker/core/jumpstart/artifacts/metric_definitions.py +117 -0
- sagemaker/core/jumpstart/artifacts/model_packages.py +202 -0
- sagemaker/core/jumpstart/artifacts/model_uris.py +252 -0
- sagemaker/core/jumpstart/artifacts/payloads.py +96 -0
- sagemaker/core/jumpstart/artifacts/predictors.py +540 -0
- sagemaker/core/jumpstart/artifacts/resource_names.py +86 -0
- sagemaker/core/jumpstart/artifacts/resource_requirements.py +162 -0
- sagemaker/core/jumpstart/artifacts/script_uris.py +172 -0
- sagemaker/core/jumpstart/cache.py +663 -0
- sagemaker/core/jumpstart/configs.py +50 -0
- sagemaker/core/jumpstart/constants.py +198 -0
- sagemaker/core/jumpstart/deserializers.py +81 -0
- sagemaker/core/jumpstart/document.py +76 -0
- sagemaker/core/jumpstart/enums.py +168 -0
- sagemaker/core/jumpstart/exceptions.py +236 -0
- sagemaker/core/jumpstart/factory/utils.py +833 -0
- sagemaker/core/jumpstart/filters.py +597 -0
- sagemaker/core/jumpstart/hub/constants.py +16 -0
- sagemaker/core/jumpstart/hub/hub.py +291 -0
- sagemaker/core/jumpstart/hub/interfaces.py +936 -0
- sagemaker/core/jumpstart/hub/parser_utils.py +70 -0
- sagemaker/core/jumpstart/hub/parsers.py +288 -0
- sagemaker/core/jumpstart/hub/types.py +35 -0
- sagemaker/core/jumpstart/hub/utils.py +260 -0
- sagemaker/core/jumpstart/models.py +501 -0
- sagemaker/core/jumpstart/notebook_utils.py +575 -0
- sagemaker/core/jumpstart/parameters.py +20 -0
- sagemaker/core/jumpstart/payload_utils.py +239 -0
- sagemaker/core/jumpstart/region_config.json +171 -0
- sagemaker/core/jumpstart/search.py +171 -0
- sagemaker/core/jumpstart/serializers.py +81 -0
- sagemaker/core/jumpstart/session_utils.py +234 -0
- sagemaker/core/jumpstart/types.py +3044 -0
- sagemaker/core/jumpstart/utils.py +1731 -0
- sagemaker/core/jumpstart/validators.py +257 -0
- sagemaker/core/lambda_helper.py +312 -0
- sagemaker/core/lineage/__init__.py +42 -0
- sagemaker/core/lineage/_api_types.py +239 -0
- sagemaker/core/lineage/_utils.py +49 -0
- sagemaker/core/lineage/action.py +345 -0
- sagemaker/core/lineage/artifact.py +646 -0
- sagemaker/core/lineage/association.py +190 -0
- sagemaker/core/lineage/context.py +505 -0
- sagemaker/core/lineage/lineage_trial_component.py +191 -0
- sagemaker/core/lineage/query.py +732 -0
- sagemaker/core/lineage/visualizer.py +346 -0
- sagemaker/core/local/__init__.py +18 -0
- sagemaker/core/local/data.py +423 -0
- sagemaker/core/local/entities.py +678 -0
- sagemaker/core/local/exceptions.py +17 -0
- sagemaker/core/local/image.py +1243 -0
- sagemaker/core/local/local_session.py +739 -0
- sagemaker/core/local/utils.py +246 -0
- sagemaker/core/logs.py +181 -0
- sagemaker/core/metadata_properties.py +56 -0
- sagemaker/core/metric_definitions.py +91 -0
- sagemaker/core/mlflow/__init__.py +38 -0
- sagemaker/core/mlflow/forward_sagemaker_metrics.py +44 -0
- sagemaker/core/model_card/__init__.py +26 -0
- sagemaker/core/model_life_cycle.py +51 -0
- sagemaker/core/model_metrics.py +160 -0
- sagemaker/core/model_monitor/__init__.py +66 -0
- sagemaker/core/model_monitor/clarify_model_monitoring.py +1497 -0
- sagemaker/core/model_monitor/cron_expression_generator.py +82 -0
- sagemaker/core/model_monitor/data_capture_config.py +115 -0
- sagemaker/core/model_monitor/data_quality_monitoring_config.py +66 -0
- sagemaker/core/model_monitor/dataset_format.py +102 -0
- sagemaker/core/model_monitor/model_monitoring.py +4266 -0
- sagemaker/core/model_monitor/monitoring_alert.py +76 -0
- sagemaker/core/model_monitor/monitoring_files.py +506 -0
- sagemaker/core/model_monitor/utils.py +793 -0
- sagemaker/core/model_registry.py +480 -0
- sagemaker/core/model_uris.py +97 -0
- sagemaker/core/modules/__init__.py +19 -0
- sagemaker/core/modules/configs.py +239 -0
- sagemaker/core/modules/constants.py +37 -0
- sagemaker/core/modules/distributed.py +182 -0
- sagemaker/core/modules/local_core/local_container.py +605 -0
- sagemaker/core/modules/templates.py +83 -0
- sagemaker/core/modules/train/__init__.py +14 -0
- sagemaker/core/modules/train/container_drivers/__init__.py +14 -0
- sagemaker/core/modules/train/container_drivers/common/__init__.py +14 -0
- sagemaker/core/modules/train/container_drivers/common/utils.py +205 -0
- sagemaker/core/modules/train/container_drivers/distributed_drivers/__init__.py +14 -0
- sagemaker/core/modules/train/container_drivers/distributed_drivers/basic_script_driver.py +81 -0
- sagemaker/core/modules/train/container_drivers/distributed_drivers/mpi_driver.py +123 -0
- sagemaker/core/modules/train/container_drivers/distributed_drivers/mpi_utils.py +302 -0
- sagemaker/core/modules/train/container_drivers/distributed_drivers/torchrun_driver.py +129 -0
- sagemaker/core/modules/train/container_drivers/scripts/__init__.py +14 -0
- sagemaker/core/modules/train/container_drivers/scripts/environment.py +305 -0
- sagemaker/core/modules/train/sm_recipes/__init__.py +0 -0
- sagemaker/core/modules/train/sm_recipes/utils.py +330 -0
- sagemaker/core/modules/types.py +19 -0
- sagemaker/core/modules/utils.py +194 -0
- sagemaker/core/network.py +185 -0
- sagemaker/core/parameter.py +173 -0
- sagemaker/core/payloads.py +185 -0
- sagemaker/core/processing.py +1599 -0
- sagemaker/core/remote_function/__init__.py +19 -0
- sagemaker/core/remote_function/checkpoint_location.py +47 -0
- sagemaker/core/remote_function/client.py +1310 -0
- sagemaker/core/remote_function/core/__init__.py +0 -0
- sagemaker/core/remote_function/core/_custom_dispatch_table.py +72 -0
- sagemaker/core/remote_function/core/pipeline_variables.py +347 -0
- sagemaker/core/remote_function/core/serialization.py +410 -0
- sagemaker/core/remote_function/core/stored_function.py +223 -0
- sagemaker/core/remote_function/custom_file_filter.py +128 -0
- sagemaker/core/remote_function/errors.py +102 -0
- sagemaker/core/remote_function/invoke_function.py +167 -0
- sagemaker/core/remote_function/job.py +2121 -0
- sagemaker/core/remote_function/logging_config.py +38 -0
- sagemaker/core/remote_function/runtime_environment/__init__.py +14 -0
- sagemaker/core/remote_function/runtime_environment/bootstrap_runtime_environment.py +605 -0
- sagemaker/core/remote_function/runtime_environment/mpi_utils_remote.py +252 -0
- sagemaker/core/remote_function/runtime_environment/runtime_environment_manager.py +554 -0
- sagemaker/core/remote_function/runtime_environment/spark_app.py +18 -0
- sagemaker/core/remote_function/spark_config.py +149 -0
- sagemaker/core/resource_requirements.py +168 -0
- {sagemaker_core/main → sagemaker/core}/resources.py +19098 -10895
- sagemaker/core/s3/__init__.py +41 -0
- sagemaker/core/s3/client.py +367 -0
- sagemaker/core/s3/utils.py +175 -0
- sagemaker/core/script_uris.py +93 -0
- sagemaker/core/serializers/__init__.py +11 -0
- sagemaker/core/serializers/base.py +510 -0
- sagemaker/core/serializers/implementations.py +159 -0
- sagemaker/core/serializers/utils.py +223 -0
- sagemaker/core/serverless_inference_config.py +63 -0
- sagemaker/core/session_settings.py +55 -0
- sagemaker/core/shapes/__init__.py +3 -0
- sagemaker/core/shapes/model_card_shapes.py +159 -0
- {sagemaker_core/main → sagemaker/core/shapes}/shapes.py +5810 -1806
- sagemaker/core/spark/__init__.py +16 -0
- sagemaker/core/spark/defaults.py +16 -0
- sagemaker/core/spark/processing.py +1380 -0
- sagemaker/core/telemetry/__init__.py +23 -0
- sagemaker/core/telemetry/constants.py +82 -0
- sagemaker/core/telemetry/telemetry_logging.py +285 -0
- sagemaker/core/tools/__init__.py +1 -0
- {sagemaker_core → sagemaker/core}/tools/codegen.py +4 -4
- {sagemaker_core → sagemaker/core}/tools/constants.py +23 -15
- {sagemaker_core → sagemaker/core}/tools/data_extractor.py +1 -1
- {sagemaker_core → sagemaker/core}/tools/method.py +1 -1
- sagemaker/core/tools/model_card/generate_model_card_from_schema.py +562 -0
- {sagemaker_core → sagemaker/core}/tools/resources_codegen.py +165 -98
- {sagemaker_core → sagemaker/core}/tools/resources_extractor.py +5 -13
- {sagemaker_core → sagemaker/core}/tools/shapes_codegen.py +16 -17
- {sagemaker_core → sagemaker/core}/tools/shapes_extractor.py +29 -67
- {sagemaker_core → sagemaker/core}/tools/templates.py +39 -17
- sagemaker/core/training/__init__.py +14 -0
- sagemaker/core/training/configs.py +345 -0
- sagemaker/core/training/constants.py +37 -0
- sagemaker/core/training/utils.py +77 -0
- sagemaker/core/training_compiler/__init__.py +16 -0
- sagemaker/core/training_compiler/config.py +197 -0
- sagemaker/core/training_compiler_config.py +197 -0
- sagemaker/core/transformer.py +793 -0
- sagemaker/core/user_agent.py +76 -0
- sagemaker/core/utilities/__init__.py +24 -0
- sagemaker/core/utilities/cache.py +169 -0
- sagemaker/core/utilities/search_expression.py +133 -0
- sagemaker/core/utils/__init__.py +48 -0
- sagemaker/core/utils/code_injection/__init__.py +0 -0
- {sagemaker_core/main → sagemaker/core/utils}/code_injection/codec.py +2 -2
- {sagemaker_core/main → sagemaker/core/utils}/code_injection/shape_dag.py +5979 -176
- {sagemaker_core/main → sagemaker/core/utils}/exceptions.py +8 -8
- sagemaker_core/main/default_configs_helper.py → sagemaker/core/utils/intelligent_defaults_helper.py +5 -6
- {sagemaker_core/main → sagemaker/core/utils}/logs.py +1 -2
- {sagemaker_core/main → sagemaker/core/utils}/utils.py +27 -22
- sagemaker/core/workflow/__init__.py +152 -0
- sagemaker/core/workflow/conditions.py +313 -0
- sagemaker/core/workflow/entities.py +58 -0
- sagemaker/core/workflow/execution_variables.py +89 -0
- sagemaker/core/workflow/functions.py +193 -0
- sagemaker/core/workflow/parameters.py +222 -0
- sagemaker/core/workflow/pipeline_context.py +394 -0
- sagemaker/core/workflow/pipeline_definition_config.py +31 -0
- sagemaker/core/workflow/properties.py +285 -0
- sagemaker/core/workflow/step_outputs.py +65 -0
- sagemaker/core/workflow/utilities.py +514 -0
- sagemaker/lineage/__init__.py +33 -0
- sagemaker/lineage/action.py +28 -0
- sagemaker/lineage/artifact.py +28 -0
- sagemaker/lineage/context.py +28 -0
- sagemaker/lineage/lineage_trial_component.py +28 -0
- {sagemaker_core-1.0.62.dist-info → sagemaker_core-2.3.1.dist-info}/METADATA +28 -9
- sagemaker_core-2.3.1.dist-info/RECORD +351 -0
- sagemaker_core-2.3.1.dist-info/top_level.txt +1 -0
- sagemaker_core/_version.py +0 -3
- sagemaker_core/helper/session_helper.py +0 -769
- sagemaker_core/resources/__init__.py +0 -1
- sagemaker_core/shapes/__init__.py +0 -1
- sagemaker_core/tools/__init__.py +0 -1
- sagemaker_core-1.0.62.dist-info/RECORD +0 -35
- sagemaker_core-1.0.62.dist-info/top_level.txt +0 -1
- {sagemaker_core → sagemaker/core/helper}/__init__.py +0 -0
- {sagemaker_core/helper → sagemaker/core/jumpstart/factory}/__init__.py +0 -0
- {sagemaker_core/main → sagemaker/core/jumpstart/hub}/__init__.py +0 -0
- {sagemaker_core/main/code_injection → sagemaker/core/modules/local_core}/__init__.py +0 -0
- {sagemaker_core/main → sagemaker/core/utils}/code_injection/base.py +0 -0
- {sagemaker_core/main → sagemaker/core/utils}/code_injection/constants.py +0 -0
- {sagemaker_core/main → sagemaker/core/utils}/user_agent.py +0 -0
- {sagemaker_core-1.0.62.dist-info → sagemaker_core-2.3.1.dist-info}/WHEEL +0 -0
- {sagemaker_core-1.0.62.dist-info → sagemaker_core-2.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,4266 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License"). You
|
|
4
|
+
# may not use this file except in compliance with the License. A copy of
|
|
5
|
+
# the License is located at
|
|
6
|
+
#
|
|
7
|
+
# http://aws.amazon.com/apache2.0/
|
|
8
|
+
#
|
|
9
|
+
# or in the "license" file accompanying this file. This file is
|
|
10
|
+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
|
|
11
|
+
# ANY KIND, either express or implied. See the License for the specific
|
|
12
|
+
# language governing permissions and limitations under the License.
|
|
13
|
+
"""This module contains code related to Amazon SageMaker Model Monitoring.
|
|
14
|
+
|
|
15
|
+
These classes assist with suggesting baselines and creating monitoring schedules for
|
|
16
|
+
data captured by SageMaker Endpoints.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import print_function, absolute_import
|
|
19
|
+
|
|
20
|
+
import copy
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
import pathlib
|
|
24
|
+
import logging
|
|
25
|
+
import uuid
|
|
26
|
+
from typing import Union, Optional, Dict, List
|
|
27
|
+
import attr
|
|
28
|
+
|
|
29
|
+
from six import string_types
|
|
30
|
+
from six.moves.urllib.parse import urlparse
|
|
31
|
+
from botocore.exceptions import ClientError
|
|
32
|
+
|
|
33
|
+
from sagemaker.core import image_uris, s3
|
|
34
|
+
from sagemaker.core.config.config_schema import (
|
|
35
|
+
SAGEMAKER,
|
|
36
|
+
MONITORING_SCHEDULE,
|
|
37
|
+
TAGS,
|
|
38
|
+
MONITORING_JOB_SUBNETS_PATH,
|
|
39
|
+
MONITORING_JOB_ENABLE_NETWORK_ISOLATION_PATH,
|
|
40
|
+
MONITORING_JOB_ENVIRONMENT_PATH,
|
|
41
|
+
MONITORING_SCHEDULE_INTER_CONTAINER_ENCRYPTION_PATH,
|
|
42
|
+
MONITORING_JOB_VOLUME_KMS_KEY_ID_PATH,
|
|
43
|
+
MONITORING_JOB_SECURITY_GROUP_IDS_PATH,
|
|
44
|
+
MONITORING_JOB_OUTPUT_KMS_KEY_ID_PATH,
|
|
45
|
+
MONITORING_JOB_ROLE_ARN_PATH,
|
|
46
|
+
)
|
|
47
|
+
from sagemaker.core.exceptions import UnexpectedStatusException
|
|
48
|
+
from sagemaker.core.model_monitor.monitoring_files import (
|
|
49
|
+
Constraints,
|
|
50
|
+
ConstraintViolations,
|
|
51
|
+
Statistics,
|
|
52
|
+
)
|
|
53
|
+
from sagemaker.core.model_monitor.monitoring_alert import (
|
|
54
|
+
MonitoringAlertSummary,
|
|
55
|
+
MonitoringAlertHistorySummary,
|
|
56
|
+
MonitoringAlertActions,
|
|
57
|
+
ModelDashboardIndicatorAction,
|
|
58
|
+
)
|
|
59
|
+
from sagemaker.core.model_monitor.utils import (
|
|
60
|
+
boto_create_monitoring_schedule,
|
|
61
|
+
boto_delete_monitoring_schedule,
|
|
62
|
+
boto_describe_monitoring_schedule,
|
|
63
|
+
boto_list_monitoring_alerts,
|
|
64
|
+
boto_list_monitoring_alert_history,
|
|
65
|
+
boto_list_monitoring_executions,
|
|
66
|
+
boto_update_monitoring_schedule,
|
|
67
|
+
boto_update_monitoring_alert,
|
|
68
|
+
boto_start_monitoring_schedule,
|
|
69
|
+
boto_stop_monitoring_schedule,
|
|
70
|
+
)
|
|
71
|
+
from sagemaker.core.model_monitor.data_quality_monitoring_config import DataQualityMonitoringConfig
|
|
72
|
+
from sagemaker.core.model_monitor.dataset_format import MonitoringDatasetFormat
|
|
73
|
+
from sagemaker.core.network import NetworkConfig
|
|
74
|
+
from sagemaker.core.processing import (
|
|
75
|
+
Processor,
|
|
76
|
+
ProcessingInput,
|
|
77
|
+
ProcessingS3Input,
|
|
78
|
+
ProcessingJob,
|
|
79
|
+
ProcessingOutput,
|
|
80
|
+
)
|
|
81
|
+
from sagemaker.core.shapes import ProcessingS3Output
|
|
82
|
+
from sagemaker.core.helper.session_helper import Session, expand_role
|
|
83
|
+
from sagemaker.core.common_utils import (
|
|
84
|
+
name_from_base,
|
|
85
|
+
retries,
|
|
86
|
+
resolve_value_from_config,
|
|
87
|
+
resolve_class_attribute_from_config,
|
|
88
|
+
format_tags,
|
|
89
|
+
list_tags,
|
|
90
|
+
)
|
|
91
|
+
from sagemaker.core.common_utils import get_resource_name_from_arn
|
|
92
|
+
from sagemaker.core.model_monitor.cron_expression_generator import CronExpressionGenerator
|
|
93
|
+
from sagemaker.core.processing import logs_for_processing_job
|
|
94
|
+
|
|
95
|
+
DEFAULT_REPOSITORY_NAME = "sagemaker-model-monitor-analyzer"
|
|
96
|
+
|
|
97
|
+
STATISTICS_JSON_DEFAULT_FILE_NAME = "statistics.json"
|
|
98
|
+
CONSTRAINTS_JSON_DEFAULT_FILE_NAME = "constraints.json"
|
|
99
|
+
CONSTRAINT_VIOLATIONS_JSON_DEFAULT_FILE_NAME = "constraint_violations.json"
|
|
100
|
+
|
|
101
|
+
_CONTAINER_BASE_PATH = "/opt/ml/processing"
|
|
102
|
+
_CONTAINER_INPUT_PATH = "input"
|
|
103
|
+
_CONTAINER_ENDPOINT_INPUT_PATH = "endpoint"
|
|
104
|
+
_BASELINE_DATASET_INPUT_NAME = "baseline_dataset_input"
|
|
105
|
+
_RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME = "record_preprocessor_script_input"
|
|
106
|
+
_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME = "post_analytics_processor_script_input"
|
|
107
|
+
_CONTAINER_OUTPUT_PATH = "output"
|
|
108
|
+
_DEFAULT_OUTPUT_NAME = "monitoring_output"
|
|
109
|
+
_MODEL_MONITOR_S3_PATH = "model-monitor"
|
|
110
|
+
_BASELINING_S3_PATH = "baselining"
|
|
111
|
+
_MONITORING_S3_PATH = "monitoring"
|
|
112
|
+
_RESULTS_S3_PATH = "results"
|
|
113
|
+
_INPUT_S3_PATH = "input"
|
|
114
|
+
|
|
115
|
+
_SUGGESTION_JOB_BASE_NAME = "baseline-suggestion-job"
|
|
116
|
+
_MONITORING_SCHEDULE_BASE_NAME = "monitoring-schedule"
|
|
117
|
+
|
|
118
|
+
_DATASET_SOURCE_PATH_ENV_NAME = "dataset_source"
|
|
119
|
+
_DATASET_FORMAT_ENV_NAME = "dataset_format"
|
|
120
|
+
_OUTPUT_PATH_ENV_NAME = "output_path"
|
|
121
|
+
_RECORD_PREPROCESSOR_SCRIPT_ENV_NAME = "record_preprocessor_script"
|
|
122
|
+
_POST_ANALYTICS_PROCESSOR_SCRIPT_ENV_NAME = "post_analytics_processor_script"
|
|
123
|
+
_PUBLISH_CLOUDWATCH_METRICS_ENV_NAME = "publish_cloudwatch_metrics"
|
|
124
|
+
_ANALYSIS_TYPE_ENV_NAME = "analysis_type"
|
|
125
|
+
_PROBLEM_TYPE_ENV_NAME = "problem_type"
|
|
126
|
+
_GROUND_TRUTH_ATTRIBUTE_ENV_NAME = "ground_truth_attribute"
|
|
127
|
+
_INFERENCE_ATTRIBUTE_ENV_NAME = "inference_attribute"
|
|
128
|
+
_PROBABILITY_ATTRIBUTE_ENV_NAME = "probability_attribute"
|
|
129
|
+
_PROBABILITY_THRESHOLD_ATTRIBUTE_ENV_NAME = "probability_threshold_attribute"
|
|
130
|
+
_CATEGORICAL_DRIFT_METHOD_ENV_NAME = "categorical_drift_method"
|
|
131
|
+
|
|
132
|
+
# Setting _LOGGER for backward compatibility, in case users import it...
|
|
133
|
+
logger = _LOGGER = logging.getLogger(__name__)
|
|
134
|
+
|
|
135
|
+
framework_name = "model-monitor"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ModelMonitor(object):
|
|
139
|
+
"""Sets up Amazon SageMaker Monitoring Schedules and baseline suggestions.
|
|
140
|
+
|
|
141
|
+
Use this class when you want to provide your own container image containing the code
|
|
142
|
+
you'd like to run, in order to produce your own statistics and constraint validation files.
|
|
143
|
+
For a more guided experience, consider using the DefaultModelMonitor class instead.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
def __init__(
|
|
147
|
+
self,
|
|
148
|
+
role=None,
|
|
149
|
+
image_uri=None,
|
|
150
|
+
instance_count=1,
|
|
151
|
+
instance_type="ml.m5.xlarge",
|
|
152
|
+
entrypoint=None,
|
|
153
|
+
volume_size_in_gb=30,
|
|
154
|
+
volume_kms_key=None,
|
|
155
|
+
output_kms_key=None,
|
|
156
|
+
max_runtime_in_seconds=None,
|
|
157
|
+
base_job_name=None,
|
|
158
|
+
sagemaker_session=None,
|
|
159
|
+
env=None,
|
|
160
|
+
tags=None,
|
|
161
|
+
network_config=None,
|
|
162
|
+
):
|
|
163
|
+
"""Initializes a ``Monitor`` instance.
|
|
164
|
+
|
|
165
|
+
The Monitor handles baselining datasets and creating Amazon SageMaker Monitoring Schedules
|
|
166
|
+
to monitor SageMaker endpoints.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
role (str): An AWS IAM role. The Amazon SageMaker jobs use this role.
|
|
170
|
+
image_uri (str): The uri of the image to use for the jobs started by
|
|
171
|
+
the Monitor.
|
|
172
|
+
instance_count (int): The number of instances to run
|
|
173
|
+
the jobs with.
|
|
174
|
+
instance_type (str): Type of EC2 instance to use for
|
|
175
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
176
|
+
entrypoint ([str]): The entrypoint for the job.
|
|
177
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
178
|
+
to use for storing data during processing (default: 30).
|
|
179
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
180
|
+
output_kms_key (str): The KMS key id for the job's outputs.
|
|
181
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
182
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
183
|
+
Default: 3600
|
|
184
|
+
base_job_name (str): Prefix for the job name. If not specified,
|
|
185
|
+
a default name is generated based on the training image name and
|
|
186
|
+
current timestamp.
|
|
187
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
188
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
189
|
+
AWS services needed. If not specified, one is created using
|
|
190
|
+
the default AWS configuration chain.
|
|
191
|
+
env (dict): Environment variables to be passed to the job.
|
|
192
|
+
tags (Optional[Tags]): List of tags to be passed to the job.
|
|
193
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
194
|
+
object that configures network isolation, encryption of
|
|
195
|
+
inter-container traffic, security group IDs, and subnets.
|
|
196
|
+
|
|
197
|
+
"""
|
|
198
|
+
self.image_uri = image_uri
|
|
199
|
+
self.instance_count = instance_count
|
|
200
|
+
self.instance_type = instance_type
|
|
201
|
+
self.entrypoint = entrypoint
|
|
202
|
+
self.volume_size_in_gb = volume_size_in_gb
|
|
203
|
+
self.max_runtime_in_seconds = max_runtime_in_seconds
|
|
204
|
+
self.base_job_name = base_job_name
|
|
205
|
+
self.sagemaker_session = sagemaker_session or Session()
|
|
206
|
+
self.tags = format_tags(tags)
|
|
207
|
+
|
|
208
|
+
self.baselining_jobs = []
|
|
209
|
+
self.latest_baselining_job = None
|
|
210
|
+
self.arguments = None
|
|
211
|
+
self.latest_baselining_job_name = None
|
|
212
|
+
self.monitoring_schedule_name = None
|
|
213
|
+
self.job_definition_name = None
|
|
214
|
+
self.role = resolve_value_from_config(
|
|
215
|
+
role, MONITORING_JOB_ROLE_ARN_PATH, sagemaker_session=self.sagemaker_session
|
|
216
|
+
)
|
|
217
|
+
if not self.role:
|
|
218
|
+
# Originally IAM role was a required parameter.
|
|
219
|
+
# Now we marked that as Optional because we can fetch it from SageMakerConfig
|
|
220
|
+
# Because of marking that parameter as optional, we should validate if it is None, even
|
|
221
|
+
# after fetching the config.
|
|
222
|
+
raise ValueError("An AWS IAM role is required to create a Monitoring Schedule.")
|
|
223
|
+
self.volume_kms_key = resolve_value_from_config(
|
|
224
|
+
volume_kms_key,
|
|
225
|
+
MONITORING_JOB_VOLUME_KMS_KEY_ID_PATH,
|
|
226
|
+
sagemaker_session=self.sagemaker_session,
|
|
227
|
+
)
|
|
228
|
+
self.output_kms_key = resolve_value_from_config(
|
|
229
|
+
output_kms_key,
|
|
230
|
+
MONITORING_JOB_OUTPUT_KMS_KEY_ID_PATH,
|
|
231
|
+
sagemaker_session=self.sagemaker_session,
|
|
232
|
+
)
|
|
233
|
+
self.network_config = resolve_class_attribute_from_config(
|
|
234
|
+
NetworkConfig,
|
|
235
|
+
network_config,
|
|
236
|
+
"subnets",
|
|
237
|
+
MONITORING_JOB_SUBNETS_PATH,
|
|
238
|
+
sagemaker_session=self.sagemaker_session,
|
|
239
|
+
)
|
|
240
|
+
self.network_config = resolve_class_attribute_from_config(
|
|
241
|
+
NetworkConfig,
|
|
242
|
+
self.network_config,
|
|
243
|
+
"security_group_ids",
|
|
244
|
+
MONITORING_JOB_SECURITY_GROUP_IDS_PATH,
|
|
245
|
+
sagemaker_session=self.sagemaker_session,
|
|
246
|
+
)
|
|
247
|
+
self.network_config = resolve_class_attribute_from_config(
|
|
248
|
+
NetworkConfig,
|
|
249
|
+
self.network_config,
|
|
250
|
+
"enable_network_isolation",
|
|
251
|
+
MONITORING_JOB_ENABLE_NETWORK_ISOLATION_PATH,
|
|
252
|
+
sagemaker_session=self.sagemaker_session,
|
|
253
|
+
)
|
|
254
|
+
self.network_config = resolve_class_attribute_from_config(
|
|
255
|
+
NetworkConfig,
|
|
256
|
+
self.network_config,
|
|
257
|
+
"encrypt_inter_container_traffic",
|
|
258
|
+
MONITORING_SCHEDULE_INTER_CONTAINER_ENCRYPTION_PATH,
|
|
259
|
+
sagemaker_session=self.sagemaker_session,
|
|
260
|
+
)
|
|
261
|
+
self.env = resolve_value_from_config(
|
|
262
|
+
env,
|
|
263
|
+
MONITORING_JOB_ENVIRONMENT_PATH,
|
|
264
|
+
default_value=None,
|
|
265
|
+
sagemaker_session=self.sagemaker_session,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def run_baseline(
|
|
269
|
+
self, baseline_inputs, output, arguments=None, wait=True, logs=True, job_name=None
|
|
270
|
+
):
|
|
271
|
+
"""Run a processing job meant to baseline your dataset.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
baseline_inputs ([sagemaker.processing.ProcessingInput]): Input files for the processing
|
|
275
|
+
job. These must be provided as ProcessingInput objects.
|
|
276
|
+
output (sagemaker.processing.ProcessingOutput): Destination of the constraint_violations
|
|
277
|
+
and statistics json files.
|
|
278
|
+
arguments ([str]): A list of string arguments to be passed to a processing job.
|
|
279
|
+
wait (bool): Whether the call should wait until the job completes (default: True).
|
|
280
|
+
logs (bool): Whether to show the logs produced by the job.
|
|
281
|
+
Only meaningful when wait is True (default: True).
|
|
282
|
+
job_name (str): Processing job name. If not specified, the processor generates
|
|
283
|
+
a default job name, based on the image name and current timestamp.
|
|
284
|
+
|
|
285
|
+
"""
|
|
286
|
+
self.latest_baselining_job_name = self._generate_baselining_job_name(job_name=job_name)
|
|
287
|
+
self.arguments = arguments
|
|
288
|
+
normalized_baseline_inputs = self._normalize_baseline_inputs(
|
|
289
|
+
baseline_inputs=baseline_inputs
|
|
290
|
+
)
|
|
291
|
+
normalized_output = self._normalize_processing_output(output=output)
|
|
292
|
+
|
|
293
|
+
baselining_processor = Processor(
|
|
294
|
+
role=self.role,
|
|
295
|
+
image_uri=self.image_uri,
|
|
296
|
+
instance_count=self.instance_count,
|
|
297
|
+
instance_type=self.instance_type,
|
|
298
|
+
entrypoint=self.entrypoint,
|
|
299
|
+
volume_size_in_gb=self.volume_size_in_gb,
|
|
300
|
+
volume_kms_key=self.volume_kms_key,
|
|
301
|
+
output_kms_key=self.output_kms_key,
|
|
302
|
+
max_runtime_in_seconds=self.max_runtime_in_seconds,
|
|
303
|
+
base_job_name=self.base_job_name,
|
|
304
|
+
sagemaker_session=self.sagemaker_session,
|
|
305
|
+
env=self.env,
|
|
306
|
+
tags=self.tags,
|
|
307
|
+
network_config=self.network_config,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
baselining_processor.run(
|
|
311
|
+
inputs=normalized_baseline_inputs,
|
|
312
|
+
outputs=[normalized_output],
|
|
313
|
+
arguments=self.arguments,
|
|
314
|
+
wait=wait,
|
|
315
|
+
logs=logs,
|
|
316
|
+
job_name=self.latest_baselining_job_name,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Create BaseliningJob manually since SageMaker 3.0 ProcessingJob has different attributes
|
|
320
|
+
self.latest_baselining_job = BaseliningJob(
|
|
321
|
+
sagemaker_session=self.sagemaker_session,
|
|
322
|
+
job_name=self.latest_baselining_job_name,
|
|
323
|
+
inputs=baseline_job_inputs,
|
|
324
|
+
outputs=[normalized_baseline_output],
|
|
325
|
+
output_kms_key=None,
|
|
326
|
+
)
|
|
327
|
+
self.baselining_jobs.append(self.latest_baselining_job)
|
|
328
|
+
|
|
329
|
+
def create_monitoring_schedule(
|
|
330
|
+
self,
|
|
331
|
+
endpoint_input=None,
|
|
332
|
+
output=None,
|
|
333
|
+
statistics=None,
|
|
334
|
+
constraints=None,
|
|
335
|
+
monitor_schedule_name=None,
|
|
336
|
+
schedule_cron_expression=None,
|
|
337
|
+
batch_transform_input=None,
|
|
338
|
+
arguments=None,
|
|
339
|
+
data_analysis_start_time=None,
|
|
340
|
+
data_analysis_end_time=None,
|
|
341
|
+
):
|
|
342
|
+
"""Creates a monitoring schedule to monitor an Amazon SageMaker Endpoint.
|
|
343
|
+
|
|
344
|
+
If constraints and statistics are provided, or if they are able to be retrieved from a
|
|
345
|
+
previous baselining job associated with this monitor, those will be used.
|
|
346
|
+
If constraints and statistics cannot be automatically retrieved, baseline_inputs will be
|
|
347
|
+
required in order to kick off a baselining job.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to monitor.
|
|
351
|
+
This can either be the endpoint name or an EndpointInput. (default: None)
|
|
352
|
+
output (sagemaker.model_monitor.MonitoringOutput): The output of the monitoring
|
|
353
|
+
schedule. (default: None)
|
|
354
|
+
statistics (sagemaker.model_monitor.Statistic or str): If provided alongside
|
|
355
|
+
constraints, these will be used for monitoring the endpoint. This can be a
|
|
356
|
+
sagemaker.model_monitor.Statistic object or an S3 uri pointing to a statistic
|
|
357
|
+
JSON file. (default: None)
|
|
358
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided alongside
|
|
359
|
+
statistics, these will be used for monitoring the endpoint. This can be a
|
|
360
|
+
sagemaker.model_monitor.Constraints object or an S3 uri pointing to a constraints
|
|
361
|
+
JSON file. (default: None)
|
|
362
|
+
monitor_schedule_name (str): Schedule name. If not specified, the processor generates
|
|
363
|
+
a default job name, based on the image name and current timestamp. (default: None)
|
|
364
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
365
|
+
this job runs at. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
366
|
+
expressions. Default: Daily. (default: None)
|
|
367
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
368
|
+
run the monitoring schedule on the batch transform
|
|
369
|
+
(default: None)
|
|
370
|
+
arguments ([str]): A list of string arguments to be passed to a processing job.
|
|
371
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
372
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
373
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
374
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
375
|
+
|
|
376
|
+
"""
|
|
377
|
+
if self.monitoring_schedule_name is not None:
|
|
378
|
+
message = (
|
|
379
|
+
"It seems that this object was already used to create an Amazon Model "
|
|
380
|
+
"Monitoring Schedule. To create another, first delete the existing one "
|
|
381
|
+
"using my_monitor.delete_monitoring_schedule()."
|
|
382
|
+
)
|
|
383
|
+
logger.warning(message)
|
|
384
|
+
raise ValueError(message)
|
|
385
|
+
|
|
386
|
+
if not output:
|
|
387
|
+
raise ValueError("output can not be None.")
|
|
388
|
+
|
|
389
|
+
if (batch_transform_input is not None) ^ (endpoint_input is None):
|
|
390
|
+
message = (
|
|
391
|
+
"Need to have either batch_transform_input or endpoint_input to create an "
|
|
392
|
+
"Amazon Model Monitoring Schedule. "
|
|
393
|
+
"Please provide only one of the above required inputs"
|
|
394
|
+
)
|
|
395
|
+
logger.error(message)
|
|
396
|
+
raise ValueError(message)
|
|
397
|
+
|
|
398
|
+
self._check_monitoring_schedule_cron_validity(
|
|
399
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
400
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
401
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
self.monitoring_schedule_name = self._generate_monitoring_schedule_name(
|
|
405
|
+
schedule_name=monitor_schedule_name
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
if batch_transform_input is not None:
|
|
409
|
+
normalized_monitoring_input = batch_transform_input._to_request_dict()
|
|
410
|
+
else:
|
|
411
|
+
normalized_monitoring_input = self._normalize_endpoint_input(
|
|
412
|
+
endpoint_input=endpoint_input
|
|
413
|
+
)._to_request_dict()
|
|
414
|
+
|
|
415
|
+
normalized_monitoring_output = self._normalize_monitoring_output_fields(output=output)
|
|
416
|
+
|
|
417
|
+
statistics_object, constraints_object = self._get_baseline_files(
|
|
418
|
+
statistics=statistics, constraints=constraints, sagemaker_session=self.sagemaker_session
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
statistics_s3_uri = None
|
|
422
|
+
if statistics_object is not None:
|
|
423
|
+
statistics_s3_uri = statistics_object.file_s3_uri
|
|
424
|
+
|
|
425
|
+
constraints_s3_uri = None
|
|
426
|
+
if constraints_object is not None:
|
|
427
|
+
constraints_s3_uri = constraints_object.file_s3_uri
|
|
428
|
+
|
|
429
|
+
monitoring_output_config = {
|
|
430
|
+
"MonitoringOutputs": [normalized_monitoring_output._to_request_dict()]
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
if self.output_kms_key is not None:
|
|
434
|
+
monitoring_output_config["KmsKeyId"] = self.output_kms_key
|
|
435
|
+
|
|
436
|
+
self.monitoring_schedule_name = (
|
|
437
|
+
monitor_schedule_name
|
|
438
|
+
or self._generate_monitoring_schedule_name(schedule_name=monitor_schedule_name)
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
network_config_dict = None
|
|
442
|
+
if self.network_config is not None:
|
|
443
|
+
network_config_dict = self.network_config._to_request_dict()
|
|
444
|
+
|
|
445
|
+
if arguments is not None:
|
|
446
|
+
self.arguments = arguments
|
|
447
|
+
|
|
448
|
+
try:
|
|
449
|
+
boto_create_monitoring_schedule(
|
|
450
|
+
sagemaker_session=self.sagemaker_session,
|
|
451
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
452
|
+
schedule_expression=schedule_cron_expression,
|
|
453
|
+
statistics_s3_uri=statistics_s3_uri,
|
|
454
|
+
constraints_s3_uri=constraints_s3_uri,
|
|
455
|
+
monitoring_inputs=[normalized_monitoring_input],
|
|
456
|
+
monitoring_output_config=monitoring_output_config,
|
|
457
|
+
instance_count=self.instance_count,
|
|
458
|
+
instance_type=self.instance_type,
|
|
459
|
+
volume_size_in_gb=self.volume_size_in_gb,
|
|
460
|
+
volume_kms_key=self.volume_kms_key,
|
|
461
|
+
image_uri=self.image_uri,
|
|
462
|
+
entrypoint=self.entrypoint,
|
|
463
|
+
arguments=self.arguments,
|
|
464
|
+
record_preprocessor_source_uri=None,
|
|
465
|
+
post_analytics_processor_source_uri=None,
|
|
466
|
+
max_runtime_in_seconds=self.max_runtime_in_seconds,
|
|
467
|
+
environment=self.env,
|
|
468
|
+
network_config=network_config_dict,
|
|
469
|
+
role_arn=expand_role(self.sagemaker_session, self.role),
|
|
470
|
+
tags=self.tags,
|
|
471
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
472
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
473
|
+
)
|
|
474
|
+
except Exception:
|
|
475
|
+
self.monitoring_schedule_name = None
|
|
476
|
+
raise
|
|
477
|
+
|
|
478
|
+
def update_monitoring_schedule(
|
|
479
|
+
self,
|
|
480
|
+
endpoint_input=None,
|
|
481
|
+
output=None,
|
|
482
|
+
statistics=None,
|
|
483
|
+
constraints=None,
|
|
484
|
+
schedule_cron_expression=None,
|
|
485
|
+
instance_count=None,
|
|
486
|
+
instance_type=None,
|
|
487
|
+
entrypoint=None,
|
|
488
|
+
volume_size_in_gb=None,
|
|
489
|
+
volume_kms_key=None,
|
|
490
|
+
output_kms_key=None,
|
|
491
|
+
arguments=None,
|
|
492
|
+
max_runtime_in_seconds=None,
|
|
493
|
+
env=None,
|
|
494
|
+
network_config=None,
|
|
495
|
+
role=None,
|
|
496
|
+
image_uri=None,
|
|
497
|
+
batch_transform_input=None,
|
|
498
|
+
data_analysis_start_time=None,
|
|
499
|
+
data_analysis_end_time=None,
|
|
500
|
+
):
|
|
501
|
+
"""Updates the existing monitoring schedule.
|
|
502
|
+
|
|
503
|
+
If more options than schedule_cron_expression are to be updated, a new job definition will
|
|
504
|
+
be created to hold them. The old job definition will not be deleted.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to monitor.
|
|
508
|
+
This can either be the endpoint name or an EndpointInput.
|
|
509
|
+
output (sagemaker.model_monitor.MonitoringOutput): The output of the monitoring
|
|
510
|
+
schedule.
|
|
511
|
+
statistics (sagemaker.model_monitor.Statistic or str): If provided alongside
|
|
512
|
+
constraints, these will be used for monitoring the endpoint. This can be a
|
|
513
|
+
sagemaker.model_monitor.Statistics object or an S3 uri pointing to a statistics
|
|
514
|
+
JSON file.
|
|
515
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided alongside
|
|
516
|
+
statistics, these will be used for monitoring the endpoint. This can be a
|
|
517
|
+
sagemaker.model_monitor.Constraints object or an S3 uri pointing to a constraints
|
|
518
|
+
JSON file.
|
|
519
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
520
|
+
this job runs at. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
521
|
+
expressions.
|
|
522
|
+
instance_count (int): The number of instances to run
|
|
523
|
+
the jobs with.
|
|
524
|
+
instance_type (str): Type of EC2 instance to use for
|
|
525
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
526
|
+
entrypoint (str): The entrypoint for the job.
|
|
527
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
528
|
+
to use for storing data during processing (default: 30).
|
|
529
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
530
|
+
output_kms_key (str): The KMS key id for the job's outputs.
|
|
531
|
+
arguments ([str]): A list of string arguments to be passed to a processing job.
|
|
532
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
533
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
534
|
+
Default: 3600
|
|
535
|
+
env (dict): Environment variables to be passed to the job.
|
|
536
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
537
|
+
object that configures network isolation, encryption of
|
|
538
|
+
inter-container traffic, security group IDs, and subnets.
|
|
539
|
+
role (str): An AWS IAM role name or ARN. The Amazon SageMaker jobs use this role.
|
|
540
|
+
image_uri (str): The uri of the image to use for the jobs started by
|
|
541
|
+
the Monitor.
|
|
542
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
543
|
+
run the monitoring schedule on the batch transform (default: None)
|
|
544
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
545
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
546
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
547
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
548
|
+
|
|
549
|
+
"""
|
|
550
|
+
monitoring_inputs = None
|
|
551
|
+
|
|
552
|
+
if (batch_transform_input is not None) and (endpoint_input is not None):
|
|
553
|
+
message = (
|
|
554
|
+
"Cannot update both batch_transform_input and endpoint_input to update an "
|
|
555
|
+
"Amazon Model Monitoring Schedule. "
|
|
556
|
+
"Please provide atmost one of the above required inputs"
|
|
557
|
+
)
|
|
558
|
+
logger.error(message)
|
|
559
|
+
raise ValueError(message)
|
|
560
|
+
|
|
561
|
+
if endpoint_input is not None:
|
|
562
|
+
monitoring_inputs = [
|
|
563
|
+
self._normalize_endpoint_input(endpoint_input=endpoint_input)._to_request_dict()
|
|
564
|
+
]
|
|
565
|
+
|
|
566
|
+
elif batch_transform_input is not None:
|
|
567
|
+
monitoring_inputs = [batch_transform_input._to_request_dict()]
|
|
568
|
+
|
|
569
|
+
monitoring_output_config = None
|
|
570
|
+
if output is not None:
|
|
571
|
+
normalized_monitoring_output = self._normalize_monitoring_output_fields(output=output)
|
|
572
|
+
monitoring_output_config = {
|
|
573
|
+
"MonitoringOutputs": [normalized_monitoring_output._to_request_dict()]
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
statistics_object, constraints_object = self._get_baseline_files(
|
|
577
|
+
statistics=statistics, constraints=constraints, sagemaker_session=self.sagemaker_session
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
statistics_s3_uri = None
|
|
581
|
+
if statistics_object is not None:
|
|
582
|
+
statistics_s3_uri = statistics_object.file_s3_uri
|
|
583
|
+
|
|
584
|
+
constraints_s3_uri = None
|
|
585
|
+
if constraints_object is not None:
|
|
586
|
+
constraints_s3_uri = constraints_object.file_s3_uri
|
|
587
|
+
|
|
588
|
+
if instance_type is not None:
|
|
589
|
+
self.instance_type = instance_type
|
|
590
|
+
|
|
591
|
+
if instance_count is not None:
|
|
592
|
+
self.instance_count = instance_count
|
|
593
|
+
|
|
594
|
+
if entrypoint is not None:
|
|
595
|
+
self.entrypoint = entrypoint
|
|
596
|
+
|
|
597
|
+
if volume_size_in_gb is not None:
|
|
598
|
+
self.volume_size_in_gb = volume_size_in_gb
|
|
599
|
+
|
|
600
|
+
if volume_kms_key is not None:
|
|
601
|
+
self.volume_kms_key = volume_kms_key
|
|
602
|
+
|
|
603
|
+
if output_kms_key is not None:
|
|
604
|
+
self.output_kms_key = output_kms_key
|
|
605
|
+
monitoring_output_config["KmsKeyId"] = self.output_kms_key
|
|
606
|
+
|
|
607
|
+
if arguments is not None:
|
|
608
|
+
self.arguments = arguments
|
|
609
|
+
|
|
610
|
+
if max_runtime_in_seconds is not None:
|
|
611
|
+
self.max_runtime_in_seconds = max_runtime_in_seconds
|
|
612
|
+
|
|
613
|
+
if env is not None:
|
|
614
|
+
self.env = env
|
|
615
|
+
|
|
616
|
+
if network_config is not None:
|
|
617
|
+
self.network_config = network_config
|
|
618
|
+
|
|
619
|
+
if role is not None:
|
|
620
|
+
self.role = role
|
|
621
|
+
|
|
622
|
+
if image_uri is not None:
|
|
623
|
+
self.image_uri = image_uri
|
|
624
|
+
|
|
625
|
+
network_config_dict = None
|
|
626
|
+
if self.network_config is not None:
|
|
627
|
+
network_config_dict = self.network_config._to_request_dict()
|
|
628
|
+
|
|
629
|
+
boto_update_monitoring_schedule(
|
|
630
|
+
sagemaker_session=self.sagemaker_session,
|
|
631
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
632
|
+
schedule_expression=schedule_cron_expression,
|
|
633
|
+
statistics_s3_uri=statistics_s3_uri,
|
|
634
|
+
constraints_s3_uri=constraints_s3_uri,
|
|
635
|
+
monitoring_inputs=monitoring_inputs,
|
|
636
|
+
monitoring_output_config=monitoring_output_config,
|
|
637
|
+
instance_count=instance_count,
|
|
638
|
+
instance_type=instance_type,
|
|
639
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
640
|
+
volume_kms_key=volume_kms_key,
|
|
641
|
+
image_uri=image_uri,
|
|
642
|
+
entrypoint=entrypoint,
|
|
643
|
+
arguments=arguments,
|
|
644
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
645
|
+
environment=env,
|
|
646
|
+
network_config=network_config_dict,
|
|
647
|
+
role_arn=expand_role(self.sagemaker_session, self.role),
|
|
648
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
649
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
self._wait_for_schedule_changes_to_apply()
|
|
653
|
+
|
|
654
|
+
def start_monitoring_schedule(self):
|
|
655
|
+
"""Starts the monitoring schedule."""
|
|
656
|
+
boto_start_monitoring_schedule(
|
|
657
|
+
self.sagemaker_session, monitoring_schedule_name=self.monitoring_schedule_name
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
self._wait_for_schedule_changes_to_apply()
|
|
661
|
+
|
|
662
|
+
def stop_monitoring_schedule(self):
|
|
663
|
+
"""Stops the monitoring schedule."""
|
|
664
|
+
boto_stop_monitoring_schedule(
|
|
665
|
+
self.sagemaker_session, monitoring_schedule_name=self.monitoring_schedule_name
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
self._wait_for_schedule_changes_to_apply()
|
|
669
|
+
|
|
670
|
+
def delete_monitoring_schedule(self):
|
|
671
|
+
"""Deletes the monitoring schedule (subclass is responsible for deleting job definition)"""
|
|
672
|
+
# DO NOT call super which erases schedule name and makes wait impossible.
|
|
673
|
+
boto_delete_monitoring_schedule(
|
|
674
|
+
self.sagemaker_session, monitoring_schedule_name=self.monitoring_schedule_name
|
|
675
|
+
)
|
|
676
|
+
if self.job_definition_name is not None:
|
|
677
|
+
# Job definition is locked by schedule so need to wait for the schedule to be deleted
|
|
678
|
+
try:
|
|
679
|
+
self._wait_for_schedule_changes_to_apply()
|
|
680
|
+
except self.sagemaker_session.sagemaker_client.exceptions.ResourceNotFound:
|
|
681
|
+
# OK the schedule is gone
|
|
682
|
+
pass
|
|
683
|
+
self.monitoring_schedule_name = None
|
|
684
|
+
|
|
685
|
+
def baseline_statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME):
|
|
686
|
+
"""Returns a Statistics object representing the statistics json file
|
|
687
|
+
|
|
688
|
+
Object is generated by the latest baselining job.
|
|
689
|
+
|
|
690
|
+
Args:
|
|
691
|
+
file_name (str): The name of the .json statistics file
|
|
692
|
+
|
|
693
|
+
Returns:
|
|
694
|
+
sagemaker.model_monitor.Statistics: The Statistics object representing the file that
|
|
695
|
+
was generated by the job.
|
|
696
|
+
|
|
697
|
+
"""
|
|
698
|
+
return self.latest_baselining_job.baseline_statistics(
|
|
699
|
+
file_name=file_name, kms_key=self.output_kms_key
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
def suggested_constraints(self, file_name=CONSTRAINTS_JSON_DEFAULT_FILE_NAME):
|
|
703
|
+
"""Returns a Statistics object representing the constraints json file.
|
|
704
|
+
|
|
705
|
+
Object is generated by the latest baselining job
|
|
706
|
+
|
|
707
|
+
Args:
|
|
708
|
+
file_name (str): The name of the .json constraints file
|
|
709
|
+
|
|
710
|
+
Returns:
|
|
711
|
+
sagemaker.model_monitor.Constraints: The Constraints object representing the file that
|
|
712
|
+
was generated by the job.
|
|
713
|
+
|
|
714
|
+
"""
|
|
715
|
+
return self.latest_baselining_job.suggested_constraints(
|
|
716
|
+
file_name=file_name, kms_key=self.output_kms_key
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
def latest_monitoring_statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME):
|
|
720
|
+
"""Returns the sagemaker.model_monitor.
|
|
721
|
+
|
|
722
|
+
Statistics generated by the latest monitoring execution.
|
|
723
|
+
|
|
724
|
+
Args:
|
|
725
|
+
file_name (str): The name of the statistics file to be retrieved. Only override if
|
|
726
|
+
generating a custom file name.
|
|
727
|
+
|
|
728
|
+
Returns:
|
|
729
|
+
sagemaker.model_monitoring.Statistics: The Statistics object representing the file
|
|
730
|
+
generated by the latest monitoring execution.
|
|
731
|
+
|
|
732
|
+
"""
|
|
733
|
+
executions = self.list_executions()
|
|
734
|
+
if len(executions) == 0:
|
|
735
|
+
logger.warning(
|
|
736
|
+
"No executions found for schedule. monitoring_schedule_name: %s",
|
|
737
|
+
self.monitoring_schedule_name,
|
|
738
|
+
)
|
|
739
|
+
return None
|
|
740
|
+
|
|
741
|
+
latest_monitoring_execution = executions[-1]
|
|
742
|
+
return latest_monitoring_execution.statistics(file_name=file_name)
|
|
743
|
+
|
|
744
|
+
def latest_monitoring_constraint_violations(
|
|
745
|
+
self, file_name=CONSTRAINT_VIOLATIONS_JSON_DEFAULT_FILE_NAME
|
|
746
|
+
):
|
|
747
|
+
"""Returns the sagemaker.model_monitor.
|
|
748
|
+
|
|
749
|
+
ConstraintViolations generated by the latest monitoring execution.
|
|
750
|
+
|
|
751
|
+
Args:
|
|
752
|
+
file_name (str): The name of the constraint violdations file to be retrieved. Only
|
|
753
|
+
override if generating a custom file name.
|
|
754
|
+
|
|
755
|
+
Returns:
|
|
756
|
+
sagemaker.model_monitoring.ConstraintViolations: The ConstraintViolations object
|
|
757
|
+
representing the file generated by the latest monitoring execution.
|
|
758
|
+
|
|
759
|
+
"""
|
|
760
|
+
executions = self.list_executions()
|
|
761
|
+
if len(executions) == 0:
|
|
762
|
+
logger.warning(
|
|
763
|
+
"No executions found for schedule. monitoring_schedule_name: %s",
|
|
764
|
+
self.monitoring_schedule_name,
|
|
765
|
+
)
|
|
766
|
+
return None
|
|
767
|
+
|
|
768
|
+
latest_monitoring_execution = executions[-1]
|
|
769
|
+
return latest_monitoring_execution.constraint_violations(file_name=file_name)
|
|
770
|
+
|
|
771
|
+
def describe_latest_baselining_job(self):
|
|
772
|
+
"""Describe the latest baselining job kicked off by the suggest workflow."""
|
|
773
|
+
if self.latest_baselining_job is None:
|
|
774
|
+
raise ValueError("No suggestion jobs were kicked off.")
|
|
775
|
+
return self.latest_baselining_job.describe()
|
|
776
|
+
|
|
777
|
+
def describe_schedule(self):
|
|
778
|
+
"""Describes the schedule that this object represents.
|
|
779
|
+
|
|
780
|
+
Returns:
|
|
781
|
+
dict: A dictionary response with the monitoring schedule description.
|
|
782
|
+
|
|
783
|
+
"""
|
|
784
|
+
return boto_describe_monitoring_schedule(
|
|
785
|
+
self.sagemaker_session, monitoring_schedule_name=self.monitoring_schedule_name
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
def list_executions(self):
|
|
789
|
+
"""Get the list of the latest monitoring executions in descending order of "ScheduledTime".
|
|
790
|
+
|
|
791
|
+
Statistics or violations can be called following this example:
|
|
792
|
+
Example:
|
|
793
|
+
>>> my_executions = my_monitor.list_executions()
|
|
794
|
+
>>> second_to_last_execution_statistics = my_executions[-1].statistics()
|
|
795
|
+
>>> second_to_last_execution_violations = my_executions[-1].constraint_violations()
|
|
796
|
+
|
|
797
|
+
Returns:
|
|
798
|
+
[sagemaker.model_monitor.MonitoringExecution]: List of MonitoringExecutions in
|
|
799
|
+
descending order of "ScheduledTime".
|
|
800
|
+
|
|
801
|
+
"""
|
|
802
|
+
monitoring_executions_dict = boto_list_monitoring_executions(
|
|
803
|
+
sagemaker_session=self.sagemaker_session,
|
|
804
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
if len(monitoring_executions_dict["MonitoringExecutionSummaries"]) == 0:
|
|
808
|
+
logger.warning(
|
|
809
|
+
"No executions found for schedule. monitoring_schedule_name: %s",
|
|
810
|
+
self.monitoring_schedule_name,
|
|
811
|
+
)
|
|
812
|
+
return []
|
|
813
|
+
|
|
814
|
+
processing_job_arns = [
|
|
815
|
+
execution_dict["ProcessingJobArn"]
|
|
816
|
+
for execution_dict in monitoring_executions_dict["MonitoringExecutionSummaries"]
|
|
817
|
+
if execution_dict.get("ProcessingJobArn") is not None
|
|
818
|
+
]
|
|
819
|
+
monitoring_executions = [
|
|
820
|
+
MonitoringExecution.from_processing_arn(
|
|
821
|
+
sagemaker_session=self.sagemaker_session, processing_job_arn=processing_job_arn
|
|
822
|
+
)
|
|
823
|
+
for processing_job_arn in processing_job_arns
|
|
824
|
+
]
|
|
825
|
+
monitoring_executions.reverse()
|
|
826
|
+
|
|
827
|
+
return monitoring_executions
|
|
828
|
+
|
|
829
|
+
def get_latest_execution_logs(self, wait=False):
|
|
830
|
+
"""Get the processing job logs for the most recent monitoring execution
|
|
831
|
+
|
|
832
|
+
Args:
|
|
833
|
+
wait (bool): Whether the call should wait until the job completes (default: False).
|
|
834
|
+
|
|
835
|
+
Raises:
|
|
836
|
+
ValueError: If no execution job or processing job for the last execution has run
|
|
837
|
+
|
|
838
|
+
Returns: None
|
|
839
|
+
"""
|
|
840
|
+
monitoring_executions = boto_list_monitoring_executions(
|
|
841
|
+
sagemaker_session=self.sagemaker_session,
|
|
842
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
843
|
+
)
|
|
844
|
+
if len(monitoring_executions["MonitoringExecutionSummaries"]) == 0:
|
|
845
|
+
raise ValueError("No execution jobs were kicked off.")
|
|
846
|
+
if "ProcessingJobArn" not in monitoring_executions["MonitoringExecutionSummaries"][0]:
|
|
847
|
+
raise ValueError("Processing Job did not run for the last execution")
|
|
848
|
+
job_arn = monitoring_executions["MonitoringExecutionSummaries"][0]["ProcessingJobArn"]
|
|
849
|
+
logs_for_processing_job(
|
|
850
|
+
sagemaker_session=self.sagemaker_session,
|
|
851
|
+
job_name=get_resource_name_from_arn(job_arn),
|
|
852
|
+
wait=wait,
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
def update_monitoring_alert(
|
|
856
|
+
self,
|
|
857
|
+
monitoring_alert_name: str,
|
|
858
|
+
data_points_to_alert: Optional[int],
|
|
859
|
+
evaluation_period: Optional[int],
|
|
860
|
+
):
|
|
861
|
+
"""Update the monitoring schedule alert.
|
|
862
|
+
|
|
863
|
+
Args:
|
|
864
|
+
monitoring_alert_name (str): The name of the monitoring alert to update.
|
|
865
|
+
data_points_to_alert (int): The data point to alert.
|
|
866
|
+
evaluation_period (int): The period to evaluate the alert status.
|
|
867
|
+
|
|
868
|
+
Returns: None
|
|
869
|
+
"""
|
|
870
|
+
|
|
871
|
+
if self.monitoring_schedule_name is None:
|
|
872
|
+
message = "Nothing to update, please create a schedule first."
|
|
873
|
+
logger.error(message)
|
|
874
|
+
raise ValueError(message)
|
|
875
|
+
|
|
876
|
+
if not data_points_to_alert and not evaluation_period:
|
|
877
|
+
raise ValueError("Got no alert property to update.")
|
|
878
|
+
|
|
879
|
+
boto_update_monitoring_alert(
|
|
880
|
+
sagemaker_session=self.sagemaker_session,
|
|
881
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
882
|
+
monitoring_alert_name=monitoring_alert_name,
|
|
883
|
+
data_points_to_alert=data_points_to_alert,
|
|
884
|
+
evaluation_period=evaluation_period,
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
def list_monitoring_alerts(
|
|
888
|
+
self, next_token: Optional[str] = None, max_results: Optional[int] = 10
|
|
889
|
+
):
|
|
890
|
+
"""List the monitoring alerts.
|
|
891
|
+
|
|
892
|
+
Args:
|
|
893
|
+
next_token (Optional[str]): The pagination token. Default: None
|
|
894
|
+
max_results (Optional[int]): The maximum number of results to return.
|
|
895
|
+
Must be between 1 and 100. Default: 10
|
|
896
|
+
|
|
897
|
+
Returns:
|
|
898
|
+
List[MonitoringAlertSummary]: list of monitoring alert history.
|
|
899
|
+
str: Next token.
|
|
900
|
+
"""
|
|
901
|
+
if self.monitoring_schedule_name is None:
|
|
902
|
+
message = "No alert to list, please create a schedule first."
|
|
903
|
+
logger.warning(message)
|
|
904
|
+
return [], None
|
|
905
|
+
|
|
906
|
+
monitoring_alert_dict: Dict = boto_list_monitoring_alerts(
|
|
907
|
+
sagemaker_session=self.sagemaker_session,
|
|
908
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
909
|
+
next_token=next_token,
|
|
910
|
+
max_results=max_results,
|
|
911
|
+
)
|
|
912
|
+
monitoring_alerts: List[MonitoringAlertSummary] = []
|
|
913
|
+
for monitoring_alert in monitoring_alert_dict["MonitoringAlertSummaries"]:
|
|
914
|
+
monitoring_alerts.append(
|
|
915
|
+
MonitoringAlertSummary(
|
|
916
|
+
alert_name=monitoring_alert["MonitoringAlertName"],
|
|
917
|
+
creation_time=monitoring_alert["CreationTime"],
|
|
918
|
+
last_modified_time=monitoring_alert["LastModifiedTime"],
|
|
919
|
+
alert_status=monitoring_alert["AlertStatus"],
|
|
920
|
+
data_points_to_alert=monitoring_alert["DatapointsToAlert"],
|
|
921
|
+
evaluation_period=monitoring_alert["EvaluationPeriod"],
|
|
922
|
+
actions=MonitoringAlertActions(
|
|
923
|
+
model_dashboard_indicator=ModelDashboardIndicatorAction(
|
|
924
|
+
enabled=monitoring_alert["Actions"]["ModelDashboardIndicator"][
|
|
925
|
+
"Enabled"
|
|
926
|
+
],
|
|
927
|
+
)
|
|
928
|
+
),
|
|
929
|
+
)
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
next_token = (
|
|
933
|
+
monitoring_alert_dict["NextToken"] if "NextToken" in monitoring_alert_dict else None
|
|
934
|
+
)
|
|
935
|
+
return monitoring_alerts, next_token
|
|
936
|
+
|
|
937
|
+
def list_monitoring_alert_history(
|
|
938
|
+
self,
|
|
939
|
+
monitoring_alert_name: Optional[str] = None,
|
|
940
|
+
sort_by: Optional[str] = "CreationTime",
|
|
941
|
+
sort_order: Optional[str] = "Descending",
|
|
942
|
+
next_token: Optional[str] = None,
|
|
943
|
+
max_results: Optional[int] = 10,
|
|
944
|
+
creation_time_before: Optional[str] = None,
|
|
945
|
+
creation_time_after: Optional[str] = None,
|
|
946
|
+
status_equals: Optional[str] = None,
|
|
947
|
+
):
|
|
948
|
+
"""Lists the alert history associated with the given schedule_name and alert_name.
|
|
949
|
+
|
|
950
|
+
Args:
|
|
951
|
+
monitoring_alert_name (Optional[str]): The name of the alert_name to filter on.
|
|
952
|
+
If not provided, does not filter on it. Default: None.
|
|
953
|
+
sort_by (Optional[str]): sort_by (str): The field to sort by.
|
|
954
|
+
Can be one of: "Name", "CreationTime"
|
|
955
|
+
Default: "CreationTime".
|
|
956
|
+
sort_order (Optional[str]): The sort order. Can be one of: "Ascending", "Descending".
|
|
957
|
+
Default: "Descending".
|
|
958
|
+
next_token (Optional[str]): The pagination token. Default: None.
|
|
959
|
+
max_results (Optional[int]): The maximum number of results to return.
|
|
960
|
+
Must be between 1 and 100. Default: 10.
|
|
961
|
+
creation_time_before (Optional[str]): A filter to filter alert history before a time
|
|
962
|
+
Default: None.
|
|
963
|
+
creation_time_after (Optional[str]): A filter to filter alert history after a time
|
|
964
|
+
Default: None.
|
|
965
|
+
status_equals (Optional[str]): A filter to filter alert history by status
|
|
966
|
+
Default: None.
|
|
967
|
+
Returns:
|
|
968
|
+
List[MonitoringAlertHistorySummary]: list of monitoring alert history.
|
|
969
|
+
str: Next token.
|
|
970
|
+
"""
|
|
971
|
+
if self.monitoring_schedule_name is None:
|
|
972
|
+
message = "No alert history to list, please create a schedule first."
|
|
973
|
+
logger.warning(message)
|
|
974
|
+
return [], None
|
|
975
|
+
|
|
976
|
+
monitoring_alert_history_dict: Dict = boto_list_monitoring_alert_history(
|
|
977
|
+
self.sagemaker_session,
|
|
978
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
979
|
+
monitoring_alert_name=monitoring_alert_name,
|
|
980
|
+
sort_by=sort_by,
|
|
981
|
+
sort_order=sort_order,
|
|
982
|
+
next_token=next_token,
|
|
983
|
+
max_results=max_results,
|
|
984
|
+
status_equals=status_equals,
|
|
985
|
+
creation_time_before=creation_time_before,
|
|
986
|
+
creation_time_after=creation_time_after,
|
|
987
|
+
)
|
|
988
|
+
monitoring_alert_history: List[MonitoringAlertHistorySummary] = []
|
|
989
|
+
for monitoring_alert_history_summary in monitoring_alert_history_dict[
|
|
990
|
+
"MonitoringAlertHistory"
|
|
991
|
+
]:
|
|
992
|
+
monitoring_alert_history.append(
|
|
993
|
+
MonitoringAlertHistorySummary(
|
|
994
|
+
alert_name=monitoring_alert_history_summary["MonitoringAlertName"],
|
|
995
|
+
creation_time=monitoring_alert_history_summary["CreationTime"],
|
|
996
|
+
alert_status=monitoring_alert_history_summary["AlertStatus"],
|
|
997
|
+
)
|
|
998
|
+
)
|
|
999
|
+
|
|
1000
|
+
next_token = (
|
|
1001
|
+
monitoring_alert_history_dict["NextToken"]
|
|
1002
|
+
if "NextToken" in monitoring_alert_history_dict
|
|
1003
|
+
else None
|
|
1004
|
+
)
|
|
1005
|
+
return monitoring_alert_history, next_token
|
|
1006
|
+
|
|
1007
|
+
@classmethod
|
|
1008
|
+
def attach(cls, monitor_schedule_name, sagemaker_session=None):
|
|
1009
|
+
"""Set this object's schedule name point to the Amazon Sagemaker Monitoring Schedule name.
|
|
1010
|
+
|
|
1011
|
+
This allows subsequent describe_schedule or list_executions calls to point
|
|
1012
|
+
to the given schedule.
|
|
1013
|
+
|
|
1014
|
+
Args:
|
|
1015
|
+
monitor_schedule_name (str): The name of the schedule to attach to.
|
|
1016
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
1017
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
1018
|
+
AWS services needed. If not specified, one is created using
|
|
1019
|
+
the default AWS configuration chain.
|
|
1020
|
+
|
|
1021
|
+
"""
|
|
1022
|
+
sagemaker_session = sagemaker_session or Session()
|
|
1023
|
+
schedule_desc = boto_describe_monitoring_schedule(
|
|
1024
|
+
sagemaker_session=sagemaker_session, monitoring_schedule_name=monitor_schedule_name
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
monitoring_job_definition = schedule_desc["MonitoringScheduleConfig"][
|
|
1028
|
+
"MonitoringJobDefinition"
|
|
1029
|
+
]
|
|
1030
|
+
role = monitoring_job_definition["RoleArn"]
|
|
1031
|
+
image_uri = monitoring_job_definition["MonitoringAppSpecification"].get("ImageUri")
|
|
1032
|
+
cluster_config = monitoring_job_definition["MonitoringResources"]["ClusterConfig"]
|
|
1033
|
+
instance_count = cluster_config.get("InstanceCount")
|
|
1034
|
+
instance_type = cluster_config["InstanceType"]
|
|
1035
|
+
volume_size_in_gb = cluster_config["VolumeSizeInGB"]
|
|
1036
|
+
volume_kms_key = cluster_config.get("VolumeKmsKeyId")
|
|
1037
|
+
entrypoint = monitoring_job_definition["MonitoringAppSpecification"].get(
|
|
1038
|
+
"ContainerEntrypoint"
|
|
1039
|
+
)
|
|
1040
|
+
output_kms_key = monitoring_job_definition["MonitoringOutputConfig"].get("KmsKeyId")
|
|
1041
|
+
network_config_dict = monitoring_job_definition.get("NetworkConfig")
|
|
1042
|
+
|
|
1043
|
+
max_runtime_in_seconds = None
|
|
1044
|
+
stopping_condition = monitoring_job_definition.get("StoppingCondition")
|
|
1045
|
+
if stopping_condition:
|
|
1046
|
+
max_runtime_in_seconds = stopping_condition.get("MaxRuntimeInSeconds")
|
|
1047
|
+
|
|
1048
|
+
env = monitoring_job_definition.get("Environment", None)
|
|
1049
|
+
|
|
1050
|
+
vpc_config = None
|
|
1051
|
+
if network_config_dict:
|
|
1052
|
+
vpc_config = network_config_dict.get("VpcConfig")
|
|
1053
|
+
|
|
1054
|
+
security_group_ids = None
|
|
1055
|
+
if vpc_config:
|
|
1056
|
+
security_group_ids = vpc_config["SecurityGroupIds"]
|
|
1057
|
+
|
|
1058
|
+
subnets = None
|
|
1059
|
+
if vpc_config:
|
|
1060
|
+
subnets = vpc_config["Subnets"]
|
|
1061
|
+
|
|
1062
|
+
network_config = None
|
|
1063
|
+
if network_config_dict:
|
|
1064
|
+
network_config = NetworkConfig(
|
|
1065
|
+
enable_network_isolation=network_config_dict["EnableNetworkIsolation"],
|
|
1066
|
+
encrypt_inter_container_traffic=network_config_dict[
|
|
1067
|
+
"EnableInterContainerTrafficEncryption"
|
|
1068
|
+
],
|
|
1069
|
+
security_group_ids=security_group_ids,
|
|
1070
|
+
subnets=subnets,
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
tags = list_tags(
|
|
1074
|
+
sagemaker_session=sagemaker_session, resource_arn=schedule_desc["MonitoringScheduleArn"]
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
attached_monitor = cls(
|
|
1078
|
+
role=role,
|
|
1079
|
+
image_uri=image_uri,
|
|
1080
|
+
instance_count=instance_count,
|
|
1081
|
+
instance_type=instance_type,
|
|
1082
|
+
entrypoint=entrypoint,
|
|
1083
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
1084
|
+
volume_kms_key=volume_kms_key,
|
|
1085
|
+
output_kms_key=output_kms_key,
|
|
1086
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
1087
|
+
sagemaker_session=sagemaker_session,
|
|
1088
|
+
env=env,
|
|
1089
|
+
tags=tags,
|
|
1090
|
+
network_config=network_config,
|
|
1091
|
+
)
|
|
1092
|
+
attached_monitor.monitoring_schedule_name = monitor_schedule_name
|
|
1093
|
+
return attached_monitor
|
|
1094
|
+
|
|
1095
|
+
@staticmethod
|
|
1096
|
+
def _attach(clazz, sagemaker_session, schedule_desc, job_desc, tags):
|
|
1097
|
+
"""Attach a model monitor object to an existing monitoring schedule.
|
|
1098
|
+
|
|
1099
|
+
Args:
|
|
1100
|
+
clazz: a subclass of this class
|
|
1101
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
1102
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
1103
|
+
AWS services needed. If not specified, one is created using
|
|
1104
|
+
the default AWS configuration chain.
|
|
1105
|
+
schedule_desc (dict): output of describe monitoring schedule API.
|
|
1106
|
+
job_desc (dict): output of describe job definition API.
|
|
1107
|
+
|
|
1108
|
+
Returns:
|
|
1109
|
+
Object of a subclass of this class.
|
|
1110
|
+
"""
|
|
1111
|
+
|
|
1112
|
+
monitoring_schedule_name = schedule_desc["MonitoringScheduleName"]
|
|
1113
|
+
job_definition_name = schedule_desc["MonitoringScheduleConfig"][
|
|
1114
|
+
"MonitoringJobDefinitionName"
|
|
1115
|
+
]
|
|
1116
|
+
monitoring_type = schedule_desc["MonitoringScheduleConfig"]["MonitoringType"]
|
|
1117
|
+
role = job_desc["RoleArn"]
|
|
1118
|
+
cluster_config = job_desc["JobResources"]["ClusterConfig"]
|
|
1119
|
+
instance_count = cluster_config.get("InstanceCount")
|
|
1120
|
+
instance_type = cluster_config["InstanceType"]
|
|
1121
|
+
volume_size_in_gb = cluster_config["VolumeSizeInGB"]
|
|
1122
|
+
volume_kms_key = cluster_config.get("VolumeKmsKeyId")
|
|
1123
|
+
output_kms_key = job_desc["{}JobOutputConfig".format(monitoring_type)].get("KmsKeyId")
|
|
1124
|
+
network_config_dict = job_desc.get("NetworkConfig", {})
|
|
1125
|
+
|
|
1126
|
+
max_runtime_in_seconds = None
|
|
1127
|
+
stopping_condition = job_desc.get("StoppingCondition")
|
|
1128
|
+
if stopping_condition:
|
|
1129
|
+
max_runtime_in_seconds = stopping_condition.get("MaxRuntimeInSeconds")
|
|
1130
|
+
|
|
1131
|
+
env = job_desc["{}AppSpecification".format(monitoring_type)].get("Environment", None)
|
|
1132
|
+
|
|
1133
|
+
vpc_config = network_config_dict.get("VpcConfig")
|
|
1134
|
+
|
|
1135
|
+
security_group_ids = None
|
|
1136
|
+
if vpc_config:
|
|
1137
|
+
security_group_ids = vpc_config["SecurityGroupIds"]
|
|
1138
|
+
|
|
1139
|
+
subnets = None
|
|
1140
|
+
if vpc_config:
|
|
1141
|
+
subnets = vpc_config["Subnets"]
|
|
1142
|
+
|
|
1143
|
+
network_config = None
|
|
1144
|
+
if network_config_dict:
|
|
1145
|
+
network_config = NetworkConfig(
|
|
1146
|
+
enable_network_isolation=network_config_dict["EnableNetworkIsolation"],
|
|
1147
|
+
encrypt_inter_container_traffic=network_config_dict[
|
|
1148
|
+
"EnableInterContainerTrafficEncryption"
|
|
1149
|
+
],
|
|
1150
|
+
security_group_ids=security_group_ids,
|
|
1151
|
+
subnets=subnets,
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
attached_monitor = clazz(
|
|
1155
|
+
role=role,
|
|
1156
|
+
instance_count=instance_count,
|
|
1157
|
+
instance_type=instance_type,
|
|
1158
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
1159
|
+
volume_kms_key=volume_kms_key,
|
|
1160
|
+
output_kms_key=output_kms_key,
|
|
1161
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
1162
|
+
sagemaker_session=sagemaker_session,
|
|
1163
|
+
env=env,
|
|
1164
|
+
tags=tags,
|
|
1165
|
+
network_config=network_config,
|
|
1166
|
+
)
|
|
1167
|
+
attached_monitor.monitoring_schedule_name = monitoring_schedule_name
|
|
1168
|
+
attached_monitor.job_definition_name = job_definition_name
|
|
1169
|
+
return attached_monitor
|
|
1170
|
+
|
|
1171
|
+
def _generate_baselining_job_name(self, job_name=None):
|
|
1172
|
+
"""Generate the job name before running a suggestion processing job.
|
|
1173
|
+
|
|
1174
|
+
Args:
|
|
1175
|
+
job_name (str): Name of the suggestion processing job to be created. If not
|
|
1176
|
+
specified, one is generated using the base name given to the
|
|
1177
|
+
constructor, if applicable.
|
|
1178
|
+
|
|
1179
|
+
Returns:
|
|
1180
|
+
str: The supplied or generated job name.
|
|
1181
|
+
|
|
1182
|
+
"""
|
|
1183
|
+
if job_name is not None:
|
|
1184
|
+
return job_name
|
|
1185
|
+
|
|
1186
|
+
if self.base_job_name:
|
|
1187
|
+
base_name = self.base_job_name
|
|
1188
|
+
else:
|
|
1189
|
+
base_name = _SUGGESTION_JOB_BASE_NAME
|
|
1190
|
+
|
|
1191
|
+
return name_from_base(base=base_name)
|
|
1192
|
+
|
|
1193
|
+
def _generate_monitoring_schedule_name(self, schedule_name=None):
|
|
1194
|
+
"""Generate the monitoring schedule name.
|
|
1195
|
+
|
|
1196
|
+
Args:
|
|
1197
|
+
schedule_name (str): Name of the monitoring schedule to be created. If not
|
|
1198
|
+
specified, one is generated using the base name given to the
|
|
1199
|
+
constructor, if applicable.
|
|
1200
|
+
|
|
1201
|
+
Returns:
|
|
1202
|
+
str: The supplied or generated job name.
|
|
1203
|
+
|
|
1204
|
+
"""
|
|
1205
|
+
if schedule_name is not None:
|
|
1206
|
+
return schedule_name
|
|
1207
|
+
|
|
1208
|
+
if self.base_job_name:
|
|
1209
|
+
base_name = self.base_job_name
|
|
1210
|
+
else:
|
|
1211
|
+
base_name = _MONITORING_SCHEDULE_BASE_NAME
|
|
1212
|
+
|
|
1213
|
+
return name_from_base(base=base_name)
|
|
1214
|
+
|
|
1215
|
+
@staticmethod
|
|
1216
|
+
def _generate_env_map(
|
|
1217
|
+
env,
|
|
1218
|
+
output_path=None,
|
|
1219
|
+
enable_cloudwatch_metrics=None,
|
|
1220
|
+
record_preprocessor_script_container_path=None,
|
|
1221
|
+
post_processor_script_container_path=None,
|
|
1222
|
+
dataset_format=None,
|
|
1223
|
+
dataset_source_container_path=None,
|
|
1224
|
+
analysis_type=None,
|
|
1225
|
+
problem_type=None,
|
|
1226
|
+
inference_attribute=None,
|
|
1227
|
+
probability_attribute=None,
|
|
1228
|
+
ground_truth_attribute=None,
|
|
1229
|
+
probability_threshold_attribute=None,
|
|
1230
|
+
categorical_drift_method=None,
|
|
1231
|
+
):
|
|
1232
|
+
"""Generate a list of environment variables from first-class parameters.
|
|
1233
|
+
|
|
1234
|
+
Args:
|
|
1235
|
+
output_path (str): Local path to the output.
|
|
1236
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
1237
|
+
the baselining or monitoring jobs.
|
|
1238
|
+
record_preprocessor_script_container_path (str): The path to the record preprocessor
|
|
1239
|
+
script.
|
|
1240
|
+
post_processor_script_container_path (str): The path to the post analytics processor
|
|
1241
|
+
script.
|
|
1242
|
+
dataset_format (dict): The format of the baseline_dataset.
|
|
1243
|
+
dataset_source_container_path (str): The path to the dataset source.
|
|
1244
|
+
inference_attribute (str): Index or JSONpath to locate predicted label(s).
|
|
1245
|
+
Only used for ModelQualityMonitor.
|
|
1246
|
+
probability_attribute (str or int): Index or JSONpath to locate probabilities.
|
|
1247
|
+
Only used for ModelQualityMonitor.
|
|
1248
|
+
ground_truth_attribute (str): Index to locate actual label(s).
|
|
1249
|
+
Only used for ModelQualityMonitor.
|
|
1250
|
+
probability_threshold_attribute (float): threshold to convert probabilities to binaries
|
|
1251
|
+
Only used for ModelQualityMonitor.
|
|
1252
|
+
categorical_drift_method (str): categorical_drift_method to override the
|
|
1253
|
+
categorical_drift_method of global monitoring_config in constraints
|
|
1254
|
+
suggested by Model Monitor container. Only used for DataQualityMonitor.
|
|
1255
|
+
|
|
1256
|
+
Returns:
|
|
1257
|
+
dict: Dictionary of environment keys and values.
|
|
1258
|
+
|
|
1259
|
+
"""
|
|
1260
|
+
cloudwatch_env_map = {True: "Enabled", False: "Disabled"}
|
|
1261
|
+
|
|
1262
|
+
if env is not None:
|
|
1263
|
+
env = copy.deepcopy(env)
|
|
1264
|
+
env = env or {}
|
|
1265
|
+
|
|
1266
|
+
if output_path is not None:
|
|
1267
|
+
env[_OUTPUT_PATH_ENV_NAME] = output_path
|
|
1268
|
+
|
|
1269
|
+
if enable_cloudwatch_metrics is not None:
|
|
1270
|
+
env[_PUBLISH_CLOUDWATCH_METRICS_ENV_NAME] = cloudwatch_env_map[
|
|
1271
|
+
enable_cloudwatch_metrics
|
|
1272
|
+
]
|
|
1273
|
+
|
|
1274
|
+
if dataset_format is not None:
|
|
1275
|
+
env[_DATASET_FORMAT_ENV_NAME] = json.dumps(dataset_format)
|
|
1276
|
+
|
|
1277
|
+
if record_preprocessor_script_container_path is not None:
|
|
1278
|
+
env[_RECORD_PREPROCESSOR_SCRIPT_ENV_NAME] = record_preprocessor_script_container_path
|
|
1279
|
+
|
|
1280
|
+
if post_processor_script_container_path is not None:
|
|
1281
|
+
env[_POST_ANALYTICS_PROCESSOR_SCRIPT_ENV_NAME] = post_processor_script_container_path
|
|
1282
|
+
|
|
1283
|
+
if dataset_source_container_path is not None:
|
|
1284
|
+
env[_DATASET_SOURCE_PATH_ENV_NAME] = dataset_source_container_path
|
|
1285
|
+
|
|
1286
|
+
if analysis_type is not None:
|
|
1287
|
+
env[_ANALYSIS_TYPE_ENV_NAME] = analysis_type
|
|
1288
|
+
|
|
1289
|
+
if problem_type is not None:
|
|
1290
|
+
env[_PROBLEM_TYPE_ENV_NAME] = problem_type
|
|
1291
|
+
|
|
1292
|
+
if inference_attribute is not None:
|
|
1293
|
+
env[_INFERENCE_ATTRIBUTE_ENV_NAME] = inference_attribute
|
|
1294
|
+
|
|
1295
|
+
if probability_attribute is not None:
|
|
1296
|
+
env[_PROBABILITY_ATTRIBUTE_ENV_NAME] = probability_attribute
|
|
1297
|
+
|
|
1298
|
+
if ground_truth_attribute is not None:
|
|
1299
|
+
env[_GROUND_TRUTH_ATTRIBUTE_ENV_NAME] = ground_truth_attribute
|
|
1300
|
+
|
|
1301
|
+
if probability_threshold_attribute is not None:
|
|
1302
|
+
env[_PROBABILITY_THRESHOLD_ATTRIBUTE_ENV_NAME] = probability_threshold_attribute
|
|
1303
|
+
|
|
1304
|
+
if categorical_drift_method is not None:
|
|
1305
|
+
env[_CATEGORICAL_DRIFT_METHOD_ENV_NAME] = categorical_drift_method
|
|
1306
|
+
|
|
1307
|
+
return env
|
|
1308
|
+
|
|
1309
|
+
@staticmethod
|
|
1310
|
+
def _get_baseline_files(statistics, constraints, sagemaker_session=None):
|
|
1311
|
+
"""Populates baseline values if possible.
|
|
1312
|
+
|
|
1313
|
+
Args:
|
|
1314
|
+
statistics (sagemaker.model_monitor.Statistics or str): The statistics object or str.
|
|
1315
|
+
If none, this method will attempt to retrieve a previously baselined constraints
|
|
1316
|
+
object.
|
|
1317
|
+
constraints (sagemaker.model_monitor.Constraints or str): The constraints object or str.
|
|
1318
|
+
If none, this method will attempt to retrieve a previously baselined constraints
|
|
1319
|
+
object.
|
|
1320
|
+
sagemaker_session (sagemaker.core.helper.session.Session): Session object which manages interactions
|
|
1321
|
+
with Amazon SageMaker APIs and any other AWS services needed. If not specified, one
|
|
1322
|
+
is created using the default AWS configuration chain.
|
|
1323
|
+
|
|
1324
|
+
Returns:
|
|
1325
|
+
sagemaker.model_monitor.Statistics, sagemaker.model_monitor.Constraints: The Statistics
|
|
1326
|
+
and Constraints objects that were provided or created by the latest
|
|
1327
|
+
baselining job. If none were found, returns None.
|
|
1328
|
+
|
|
1329
|
+
"""
|
|
1330
|
+
if statistics is not None and isinstance(statistics, string_types):
|
|
1331
|
+
statistics = Statistics.from_s3_uri(
|
|
1332
|
+
statistics_file_s3_uri=statistics, sagemaker_session=sagemaker_session
|
|
1333
|
+
)
|
|
1334
|
+
if constraints is not None and isinstance(constraints, string_types):
|
|
1335
|
+
constraints = Constraints.from_s3_uri(
|
|
1336
|
+
constraints_file_s3_uri=constraints, sagemaker_session=sagemaker_session
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
return statistics, constraints
|
|
1340
|
+
|
|
1341
|
+
def _normalize_endpoint_input(self, endpoint_input):
|
|
1342
|
+
"""Ensure that the input is an EndpointInput object.
|
|
1343
|
+
|
|
1344
|
+
Args:
|
|
1345
|
+
endpoint_input ([str or sagemaker.model_monitor.EndpointInput]): An endpoint input
|
|
1346
|
+
to be normalized. Can be either a string or a EndpointInput object.
|
|
1347
|
+
|
|
1348
|
+
Returns:
|
|
1349
|
+
sagemaker.model_monitor.EndpointInput: The normalized EndpointInput object.
|
|
1350
|
+
|
|
1351
|
+
"""
|
|
1352
|
+
# If the input is a string, turn it into an EndpointInput object.
|
|
1353
|
+
if isinstance(endpoint_input, string_types):
|
|
1354
|
+
endpoint_input = EndpointInput(
|
|
1355
|
+
endpoint_name=endpoint_input,
|
|
1356
|
+
destination=str(
|
|
1357
|
+
pathlib.PurePosixPath(
|
|
1358
|
+
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _CONTAINER_ENDPOINT_INPUT_PATH
|
|
1359
|
+
)
|
|
1360
|
+
),
|
|
1361
|
+
)
|
|
1362
|
+
|
|
1363
|
+
return endpoint_input
|
|
1364
|
+
|
|
1365
|
+
def _normalize_baseline_inputs(self, baseline_inputs=None):
|
|
1366
|
+
"""Ensure that all the ProcessingInput objects have names and S3 uris.
|
|
1367
|
+
|
|
1368
|
+
Args:
|
|
1369
|
+
baseline_inputs ([sagemaker.processing.ProcessingInput]): A list of ProcessingInput
|
|
1370
|
+
objects to be normalized.
|
|
1371
|
+
|
|
1372
|
+
Returns:
|
|
1373
|
+
[sagemaker.processing.ProcessingInput]: The list of normalized
|
|
1374
|
+
ProcessingInput objects.
|
|
1375
|
+
|
|
1376
|
+
"""
|
|
1377
|
+
# Initialize a list of normalized ProcessingInput objects.
|
|
1378
|
+
normalized_inputs = []
|
|
1379
|
+
if baseline_inputs is not None:
|
|
1380
|
+
# Iterate through the provided list of inputs.
|
|
1381
|
+
for count, file_input in enumerate(baseline_inputs, 1):
|
|
1382
|
+
if not isinstance(file_input, ProcessingInput):
|
|
1383
|
+
raise TypeError("Your inputs must be provided as ProcessingInput objects.")
|
|
1384
|
+
# Generate a name for the ProcessingInput if it doesn't have one.
|
|
1385
|
+
if file_input.input_name is None:
|
|
1386
|
+
file_input.input_name = "input-{}".format(count)
|
|
1387
|
+
# If the source is a local path, upload it to S3
|
|
1388
|
+
# and save the S3 uri in the ProcessingInput source.
|
|
1389
|
+
parse_result = urlparse(file_input.s3_input.s3_uri)
|
|
1390
|
+
if parse_result.scheme != "s3":
|
|
1391
|
+
s3_uri = s3.s3_path_join(
|
|
1392
|
+
"s3://",
|
|
1393
|
+
self.sagemaker_session.default_bucket(),
|
|
1394
|
+
self.sagemaker_session.default_bucket_prefix,
|
|
1395
|
+
self.latest_baselining_job_name,
|
|
1396
|
+
file_input.input_name,
|
|
1397
|
+
)
|
|
1398
|
+
s3.S3Uploader.upload(
|
|
1399
|
+
local_path=file_input.s3_input.s3_uri,
|
|
1400
|
+
desired_s3_uri=s3_uri,
|
|
1401
|
+
sagemaker_session=self.sagemaker_session,
|
|
1402
|
+
)
|
|
1403
|
+
file_input.s3_input.s3_uri = s3_uri
|
|
1404
|
+
normalized_inputs.append(file_input)
|
|
1405
|
+
return normalized_inputs
|
|
1406
|
+
|
|
1407
|
+
def _normalize_baseline_output(self, output_s3_uri=None):
|
|
1408
|
+
"""Ensure that the output is a ProcessingOutput object.
|
|
1409
|
+
|
|
1410
|
+
Args:
|
|
1411
|
+
output_s3_uri (str): The output S3 uri to deposit the baseline files in.
|
|
1412
|
+
|
|
1413
|
+
Returns:
|
|
1414
|
+
sagemaker.processing.ProcessingOutput: The normalized ProcessingOutput object.
|
|
1415
|
+
|
|
1416
|
+
"""
|
|
1417
|
+
s3_uri = output_s3_uri or s3.s3_path_join(
|
|
1418
|
+
"s3://",
|
|
1419
|
+
self.sagemaker_session.default_bucket(),
|
|
1420
|
+
self.sagemaker_session.default_bucket_prefix,
|
|
1421
|
+
_MODEL_MONITOR_S3_PATH,
|
|
1422
|
+
_BASELINING_S3_PATH,
|
|
1423
|
+
self.latest_baselining_job_name,
|
|
1424
|
+
_RESULTS_S3_PATH,
|
|
1425
|
+
)
|
|
1426
|
+
|
|
1427
|
+
return ProcessingOutput(
|
|
1428
|
+
output_name=_DEFAULT_OUTPUT_NAME,
|
|
1429
|
+
s3_output=ProcessingS3Output(
|
|
1430
|
+
s3_uri=s3_uri,
|
|
1431
|
+
local_path=str(pathlib.PurePosixPath(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH)),
|
|
1432
|
+
s3_upload_mode="EndOfJob",
|
|
1433
|
+
),
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
def _normalize_processing_output(self, output=None):
|
|
1437
|
+
"""Ensure that the output is a ProcessingOutput object.
|
|
1438
|
+
|
|
1439
|
+
Args:
|
|
1440
|
+
output (str or sagemaker.processing.ProcessingOutput): An output to be normalized.
|
|
1441
|
+
|
|
1442
|
+
Returns:
|
|
1443
|
+
sagemaker.processing.ProcessingOutput: The normalized ProcessingOutput object.
|
|
1444
|
+
|
|
1445
|
+
"""
|
|
1446
|
+
# If the output is a string, turn it into a ProcessingOutput object.
|
|
1447
|
+
if isinstance(output, string_types):
|
|
1448
|
+
s3_uri = s3.s3_path_join(
|
|
1449
|
+
"s3://",
|
|
1450
|
+
self.sagemaker_session.default_bucket(),
|
|
1451
|
+
self.sagemaker_session.default_bucket_prefix,
|
|
1452
|
+
self.latest_baselining_job_name,
|
|
1453
|
+
"output",
|
|
1454
|
+
)
|
|
1455
|
+
output = ProcessingOutput(
|
|
1456
|
+
source=output, destination=s3_uri, output_name=_DEFAULT_OUTPUT_NAME
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
return output
|
|
1460
|
+
|
|
1461
|
+
def _normalize_monitoring_output(self, monitoring_schedule_name, output_s3_uri=None):
|
|
1462
|
+
"""Ensure that the output is a MonitoringOutput object.
|
|
1463
|
+
|
|
1464
|
+
Args:
|
|
1465
|
+
monitoring_schedule_name (str): Monitoring schedule name
|
|
1466
|
+
output_s3_uri (str): The output S3 uri to deposit the monitoring evaluation files in.
|
|
1467
|
+
|
|
1468
|
+
Returns:
|
|
1469
|
+
sagemaker.model_monitor.MonitoringOutput: The normalized MonitoringOutput object.
|
|
1470
|
+
|
|
1471
|
+
"""
|
|
1472
|
+
s3_uri = output_s3_uri or s3.s3_path_join(
|
|
1473
|
+
"s3://",
|
|
1474
|
+
self.sagemaker_session.default_bucket(),
|
|
1475
|
+
self.sagemaker_session.default_bucket_prefix,
|
|
1476
|
+
_MODEL_MONITOR_S3_PATH,
|
|
1477
|
+
_MONITORING_S3_PATH,
|
|
1478
|
+
monitoring_schedule_name,
|
|
1479
|
+
_RESULTS_S3_PATH,
|
|
1480
|
+
)
|
|
1481
|
+
output = MonitoringOutput(
|
|
1482
|
+
source=str(pathlib.PurePosixPath(_CONTAINER_BASE_PATH, _CONTAINER_OUTPUT_PATH)),
|
|
1483
|
+
destination=s3_uri,
|
|
1484
|
+
)
|
|
1485
|
+
return output
|
|
1486
|
+
|
|
1487
|
+
def _normalize_monitoring_output_fields(self, output=None):
|
|
1488
|
+
"""Ensure that output has the correct fields.
|
|
1489
|
+
|
|
1490
|
+
Args:
|
|
1491
|
+
output (sagemaker.model_monitor.MonitoringOutput): An output to be normalized.
|
|
1492
|
+
|
|
1493
|
+
Returns:
|
|
1494
|
+
sagemaker.model_monitor.MonitoringOutput: The normalized MonitoringOutput object.
|
|
1495
|
+
|
|
1496
|
+
"""
|
|
1497
|
+
# If the output destination is missing, assign a default destination to it.
|
|
1498
|
+
if output.s3_output.s3_uri is None:
|
|
1499
|
+
output.s3_output.s3_uri = s3.s3_path_join(
|
|
1500
|
+
"s3://",
|
|
1501
|
+
self.sagemaker_session.default_bucket(),
|
|
1502
|
+
self.sagemaker_session.default_bucket_prefix,
|
|
1503
|
+
self.monitoring_schedule_name,
|
|
1504
|
+
"output",
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1507
|
+
return output
|
|
1508
|
+
|
|
1509
|
+
def _s3_uri_from_local_path(self, path):
|
|
1510
|
+
"""If path is local, uploads to S3 and returns S3 uri. Otherwise returns S3 uri as-is.
|
|
1511
|
+
|
|
1512
|
+
Args:
|
|
1513
|
+
path (str): Path to file. This can be a local path or an S3 path.
|
|
1514
|
+
|
|
1515
|
+
Returns:
|
|
1516
|
+
str: S3 uri to file.
|
|
1517
|
+
|
|
1518
|
+
"""
|
|
1519
|
+
parse_result = urlparse(path)
|
|
1520
|
+
if parse_result.scheme != "s3":
|
|
1521
|
+
s3_uri = s3.s3_path_join(
|
|
1522
|
+
"s3://",
|
|
1523
|
+
self.sagemaker_session.default_bucket(),
|
|
1524
|
+
self.sagemaker_session.default_bucket_prefix,
|
|
1525
|
+
_MODEL_MONITOR_S3_PATH,
|
|
1526
|
+
_MONITORING_S3_PATH,
|
|
1527
|
+
self.monitoring_schedule_name,
|
|
1528
|
+
_INPUT_S3_PATH,
|
|
1529
|
+
str(uuid.uuid4()),
|
|
1530
|
+
)
|
|
1531
|
+
s3.S3Uploader.upload(
|
|
1532
|
+
local_path=path, desired_s3_uri=s3_uri, sagemaker_session=self.sagemaker_session
|
|
1533
|
+
)
|
|
1534
|
+
path = s3.s3_path_join(s3_uri, os.path.basename(path))
|
|
1535
|
+
return path
|
|
1536
|
+
|
|
1537
|
+
def _wait_for_schedule_changes_to_apply(self):
|
|
1538
|
+
"""Waits for the schedule to no longer be in the 'Pending' state."""
|
|
1539
|
+
for _ in retries(
|
|
1540
|
+
max_retry_count=36, # 36*5 = 3min
|
|
1541
|
+
exception_message_prefix="Waiting for schedule to leave 'Pending' status",
|
|
1542
|
+
seconds_to_sleep=5,
|
|
1543
|
+
):
|
|
1544
|
+
schedule_desc = self.describe_schedule()
|
|
1545
|
+
if schedule_desc["MonitoringScheduleStatus"] != "Pending":
|
|
1546
|
+
break
|
|
1547
|
+
|
|
1548
|
+
@classmethod
|
|
1549
|
+
def monitoring_type(cls):
|
|
1550
|
+
"""Type of the monitoring job."""
|
|
1551
|
+
raise TypeError("Subclass of {} shall define this property".format(__class__.__name__))
|
|
1552
|
+
|
|
1553
|
+
def _check_monitoring_schedule_cron_validity(
|
|
1554
|
+
self,
|
|
1555
|
+
schedule_cron_expression=None,
|
|
1556
|
+
data_analysis_start_time=None,
|
|
1557
|
+
data_analysis_end_time=None,
|
|
1558
|
+
):
|
|
1559
|
+
"""Checks if the schedule expression for the schedule is valid
|
|
1560
|
+
|
|
1561
|
+
Args:
|
|
1562
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
1563
|
+
this job run. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
1564
|
+
expressions. Default: Daily.
|
|
1565
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
1566
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
1567
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
1568
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
1569
|
+
"""
|
|
1570
|
+
|
|
1571
|
+
if schedule_cron_expression == CronExpressionGenerator.now() and (
|
|
1572
|
+
data_analysis_start_time is None or data_analysis_end_time is None
|
|
1573
|
+
):
|
|
1574
|
+
message = (
|
|
1575
|
+
"Both data_analysis_start_time and data_analysis_end_time are required "
|
|
1576
|
+
"for one time monitoring schedule "
|
|
1577
|
+
)
|
|
1578
|
+
_LOGGER.error(message)
|
|
1579
|
+
raise ValueError(message)
|
|
1580
|
+
|
|
1581
|
+
def _create_monitoring_schedule_from_job_definition(
|
|
1582
|
+
self,
|
|
1583
|
+
monitor_schedule_name,
|
|
1584
|
+
job_definition_name,
|
|
1585
|
+
schedule_cron_expression=None,
|
|
1586
|
+
data_analysis_start_time=None,
|
|
1587
|
+
data_analysis_end_time=None,
|
|
1588
|
+
):
|
|
1589
|
+
"""Creates a monitoring schedule.
|
|
1590
|
+
|
|
1591
|
+
Args:
|
|
1592
|
+
monitor_schedule_name (str): Monitoring schedule name.
|
|
1593
|
+
job_definition_name (str): Job definition name.
|
|
1594
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
1595
|
+
this job run. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
1596
|
+
expressions. Default: Daily.
|
|
1597
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
1598
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
1599
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
1600
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
1601
|
+
"""
|
|
1602
|
+
message = "Creating Monitoring Schedule with name: {}".format(monitor_schedule_name)
|
|
1603
|
+
logger.info(message)
|
|
1604
|
+
|
|
1605
|
+
self._check_monitoring_schedule_cron_validity(
|
|
1606
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
1607
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
1608
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
1609
|
+
)
|
|
1610
|
+
|
|
1611
|
+
monitoring_schedule_config = {
|
|
1612
|
+
"MonitoringJobDefinitionName": job_definition_name,
|
|
1613
|
+
"MonitoringType": self.monitoring_type(),
|
|
1614
|
+
}
|
|
1615
|
+
if schedule_cron_expression is not None:
|
|
1616
|
+
monitoring_schedule_config["ScheduleConfig"] = {
|
|
1617
|
+
"ScheduleExpression": schedule_cron_expression,
|
|
1618
|
+
}
|
|
1619
|
+
if data_analysis_start_time is not None:
|
|
1620
|
+
monitoring_schedule_config["ScheduleConfig"][
|
|
1621
|
+
"DataAnalysisStartTime"
|
|
1622
|
+
] = data_analysis_start_time
|
|
1623
|
+
|
|
1624
|
+
if data_analysis_end_time is not None:
|
|
1625
|
+
monitoring_schedule_config["ScheduleConfig"][
|
|
1626
|
+
"DataAnalysisEndTime"
|
|
1627
|
+
] = data_analysis_end_time
|
|
1628
|
+
|
|
1629
|
+
all_tags = self.sagemaker_session._append_sagemaker_config_tags(
|
|
1630
|
+
self.tags, "{}.{}.{}".format(SAGEMAKER, MONITORING_SCHEDULE, TAGS)
|
|
1631
|
+
)
|
|
1632
|
+
|
|
1633
|
+
# Not using value from sagemaker
|
|
1634
|
+
# config key MONITORING_SCHEDULE_INTER_CONTAINER_ENCRYPTION_PATH here
|
|
1635
|
+
# because no MonitoringJobDefinition is set for this call
|
|
1636
|
+
|
|
1637
|
+
self.sagemaker_session.sagemaker_client.create_monitoring_schedule(
|
|
1638
|
+
MonitoringScheduleName=monitor_schedule_name,
|
|
1639
|
+
MonitoringScheduleConfig=monitoring_schedule_config,
|
|
1640
|
+
Tags=all_tags or [],
|
|
1641
|
+
)
|
|
1642
|
+
|
|
1643
|
+
def _upload_and_convert_to_processing_input(self, source, destination, name):
|
|
1644
|
+
"""Generates a ProcessingInput object from a source.
|
|
1645
|
+
|
|
1646
|
+
Source can be a local path or an S3 uri.
|
|
1647
|
+
|
|
1648
|
+
Args:
|
|
1649
|
+
source (str): The source of the data. This can be a local path or an S3 uri.
|
|
1650
|
+
destination (str): The desired container path for the data to be downloaded to.
|
|
1651
|
+
name (str): The name of the ProcessingInput.
|
|
1652
|
+
|
|
1653
|
+
Returns:
|
|
1654
|
+
sagemaker.processing.ProcessingInput: The ProcessingInput object.
|
|
1655
|
+
|
|
1656
|
+
"""
|
|
1657
|
+
if source is None:
|
|
1658
|
+
return None
|
|
1659
|
+
|
|
1660
|
+
parse_result = urlparse(url=source)
|
|
1661
|
+
|
|
1662
|
+
if parse_result.scheme != "s3":
|
|
1663
|
+
s3_uri = s3.s3_path_join(
|
|
1664
|
+
"s3://",
|
|
1665
|
+
self.sagemaker_session.default_bucket(),
|
|
1666
|
+
self.sagemaker_session.default_bucket_prefix,
|
|
1667
|
+
_MODEL_MONITOR_S3_PATH,
|
|
1668
|
+
_BASELINING_S3_PATH,
|
|
1669
|
+
self.latest_baselining_job_name,
|
|
1670
|
+
_INPUT_S3_PATH,
|
|
1671
|
+
name,
|
|
1672
|
+
)
|
|
1673
|
+
s3.S3Uploader.upload(
|
|
1674
|
+
local_path=source, desired_s3_uri=s3_uri, sagemaker_session=self.sagemaker_session
|
|
1675
|
+
)
|
|
1676
|
+
source = s3_uri
|
|
1677
|
+
|
|
1678
|
+
return ProcessingInput(
|
|
1679
|
+
input_name=name,
|
|
1680
|
+
s3_input=ProcessingS3Input(
|
|
1681
|
+
s3_uri=source,
|
|
1682
|
+
local_path=destination,
|
|
1683
|
+
s3_data_type="S3Prefix",
|
|
1684
|
+
s3_input_mode="File",
|
|
1685
|
+
s3_data_distribution_type="FullyReplicated",
|
|
1686
|
+
),
|
|
1687
|
+
)
|
|
1688
|
+
|
|
1689
|
+
# noinspection PyMethodOverriding
|
|
1690
|
+
def _update_monitoring_schedule(
|
|
1691
|
+
self,
|
|
1692
|
+
job_definition_name,
|
|
1693
|
+
schedule_cron_expression=None,
|
|
1694
|
+
data_analysis_start_time=None,
|
|
1695
|
+
data_analysis_end_time=None,
|
|
1696
|
+
):
|
|
1697
|
+
"""Updates existing monitoring schedule with new job definition and/or schedule expression.
|
|
1698
|
+
|
|
1699
|
+
Args:
|
|
1700
|
+
job_definition_name (str): Job definition name.
|
|
1701
|
+
schedule_cron_expression (str or None): The cron expression that dictates the frequency
|
|
1702
|
+
that this job run. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
1703
|
+
expressions.
|
|
1704
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
1705
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
1706
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
1707
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
1708
|
+
"""
|
|
1709
|
+
if self.job_definition_name is None or self.monitoring_schedule_name is None:
|
|
1710
|
+
message = "Nothing to update, please create a schedule first."
|
|
1711
|
+
logger.error(message)
|
|
1712
|
+
raise ValueError(message)
|
|
1713
|
+
|
|
1714
|
+
self._check_monitoring_schedule_cron_validity(
|
|
1715
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
1716
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
1717
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
1718
|
+
)
|
|
1719
|
+
|
|
1720
|
+
monitoring_schedule_config = {
|
|
1721
|
+
"MonitoringJobDefinitionName": job_definition_name,
|
|
1722
|
+
"MonitoringType": self.monitoring_type(),
|
|
1723
|
+
}
|
|
1724
|
+
if schedule_cron_expression is not None:
|
|
1725
|
+
monitoring_schedule_config["ScheduleConfig"] = {
|
|
1726
|
+
"ScheduleExpression": schedule_cron_expression
|
|
1727
|
+
}
|
|
1728
|
+
if data_analysis_start_time is not None:
|
|
1729
|
+
monitoring_schedule_config["ScheduleConfig"][
|
|
1730
|
+
"DataAnalysisStartTime"
|
|
1731
|
+
] = data_analysis_start_time
|
|
1732
|
+
if data_analysis_end_time is not None:
|
|
1733
|
+
monitoring_schedule_config["ScheduleConfig"][
|
|
1734
|
+
"DataAnalysisEndTime"
|
|
1735
|
+
] = data_analysis_end_time
|
|
1736
|
+
|
|
1737
|
+
# Not using value from sagemaker
|
|
1738
|
+
# config key MONITORING_SCHEDULE_INTER_CONTAINER_ENCRYPTION_PATH here
|
|
1739
|
+
# because no MonitoringJobDefinition is set for this call
|
|
1740
|
+
|
|
1741
|
+
self.sagemaker_session.sagemaker_client.update_monitoring_schedule(
|
|
1742
|
+
MonitoringScheduleName=self.monitoring_schedule_name,
|
|
1743
|
+
MonitoringScheduleConfig=monitoring_schedule_config,
|
|
1744
|
+
)
|
|
1745
|
+
self._wait_for_schedule_changes_to_apply()
|
|
1746
|
+
|
|
1747
|
+
|
|
1748
|
+
class DefaultModelMonitor(ModelMonitor):
|
|
1749
|
+
"""Sets up Amazon SageMaker Monitoring Schedules and baseline suggestions.
|
|
1750
|
+
|
|
1751
|
+
Use this class when you want to utilize Amazon SageMaker Monitoring's plug-and-play
|
|
1752
|
+
solution that only requires your dataset and optional pre/postprocessing scripts.
|
|
1753
|
+
For a more customized experience, consider using the ModelMonitor class instead.
|
|
1754
|
+
"""
|
|
1755
|
+
|
|
1756
|
+
JOB_DEFINITION_BASE_NAME = "data-quality-job-definition"
|
|
1757
|
+
|
|
1758
|
+
def __init__(
|
|
1759
|
+
self,
|
|
1760
|
+
role=None,
|
|
1761
|
+
instance_count=1,
|
|
1762
|
+
instance_type="ml.m5.xlarge",
|
|
1763
|
+
volume_size_in_gb=30,
|
|
1764
|
+
volume_kms_key=None,
|
|
1765
|
+
output_kms_key=None,
|
|
1766
|
+
max_runtime_in_seconds=None,
|
|
1767
|
+
base_job_name=None,
|
|
1768
|
+
sagemaker_session=None,
|
|
1769
|
+
env=None,
|
|
1770
|
+
tags=None,
|
|
1771
|
+
network_config=None,
|
|
1772
|
+
):
|
|
1773
|
+
"""Initializes a ``Monitor`` instance.
|
|
1774
|
+
|
|
1775
|
+
The Monitor handles baselining datasets and creating Amazon SageMaker Monitoring
|
|
1776
|
+
Schedules to monitor SageMaker endpoints.
|
|
1777
|
+
|
|
1778
|
+
Args:
|
|
1779
|
+
role (str): An AWS IAM role name or ARN. The Amazon SageMaker jobs use this role.
|
|
1780
|
+
instance_count (int): The number of instances to run the jobs with.
|
|
1781
|
+
instance_type (str): Type of EC2 instance to use for the job, for example,
|
|
1782
|
+
'ml.m5.xlarge'.
|
|
1783
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
1784
|
+
to use for storing data during processing (default: 30).
|
|
1785
|
+
volume_kms_key (str): A KMS key for the processing volume.
|
|
1786
|
+
output_kms_key (str): The KMS key id for the job's outputs.
|
|
1787
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
1788
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
1789
|
+
Default: 3600
|
|
1790
|
+
base_job_name (str): Prefix for the job name. If not specified,
|
|
1791
|
+
a default name is generated based on the training image name and
|
|
1792
|
+
current timestamp.
|
|
1793
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
1794
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
1795
|
+
AWS services needed. If not specified, one is created using
|
|
1796
|
+
the default AWS configuration chain.
|
|
1797
|
+
env (dict): Environment variables to be passed to the job.
|
|
1798
|
+
tags (Optional[Tags]): List of tags to be passed to the job.
|
|
1799
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
1800
|
+
object that configures network isolation, encryption of
|
|
1801
|
+
inter-container traffic, security group IDs, and subnets.
|
|
1802
|
+
|
|
1803
|
+
"""
|
|
1804
|
+
session = sagemaker_session or Session()
|
|
1805
|
+
super(DefaultModelMonitor, self).__init__(
|
|
1806
|
+
role=role,
|
|
1807
|
+
image_uri=DefaultModelMonitor._get_default_image_uri(session.boto_session.region_name),
|
|
1808
|
+
instance_count=instance_count,
|
|
1809
|
+
instance_type=instance_type,
|
|
1810
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
1811
|
+
volume_kms_key=volume_kms_key,
|
|
1812
|
+
output_kms_key=output_kms_key,
|
|
1813
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
1814
|
+
base_job_name=base_job_name,
|
|
1815
|
+
sagemaker_session=sagemaker_session,
|
|
1816
|
+
env=env,
|
|
1817
|
+
tags=format_tags(tags),
|
|
1818
|
+
network_config=network_config,
|
|
1819
|
+
)
|
|
1820
|
+
|
|
1821
|
+
@classmethod
|
|
1822
|
+
def monitoring_type(cls):
|
|
1823
|
+
"""Type of the monitoring job."""
|
|
1824
|
+
return "DataQuality"
|
|
1825
|
+
|
|
1826
|
+
# ToDo: either support record_preprocessor_script or remove it from here. It has
|
|
1827
|
+
# not been removed due to backward compatibility issues
|
|
1828
|
+
def suggest_baseline(
|
|
1829
|
+
self,
|
|
1830
|
+
baseline_dataset,
|
|
1831
|
+
dataset_format,
|
|
1832
|
+
record_preprocessor_script=None,
|
|
1833
|
+
post_analytics_processor_script=None,
|
|
1834
|
+
output_s3_uri=None,
|
|
1835
|
+
wait=True,
|
|
1836
|
+
logs=True,
|
|
1837
|
+
job_name=None,
|
|
1838
|
+
monitoring_config_override=None,
|
|
1839
|
+
):
|
|
1840
|
+
"""Suggest baselines for use with Amazon SageMaker Model Monitoring Schedules.
|
|
1841
|
+
|
|
1842
|
+
Args:
|
|
1843
|
+
baseline_dataset (str): The path to the baseline_dataset file. This can be a local path
|
|
1844
|
+
or an S3 uri.
|
|
1845
|
+
dataset_format (dict): The format of the baseline_dataset.
|
|
1846
|
+
record_preprocessor_script (str): The path to the record preprocessor script. This can
|
|
1847
|
+
be a local path or an S3 uri.
|
|
1848
|
+
post_analytics_processor_script (str): The path to the record post-analytics processor
|
|
1849
|
+
script. This can be a local path or an S3 uri.
|
|
1850
|
+
output_s3_uri (str): Desired S3 destination Destination of the constraint_violations
|
|
1851
|
+
and statistics json files.
|
|
1852
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
1853
|
+
wait (bool): Whether the call should wait until the job completes (default: True).
|
|
1854
|
+
logs (bool): Whether to show the logs produced by the job.
|
|
1855
|
+
Only meaningful when wait is True (default: True).
|
|
1856
|
+
job_name (str): Processing job name. If not specified, the processor generates
|
|
1857
|
+
a default job name, based on the image name and current timestamp.
|
|
1858
|
+
monitoring_config_override (DataQualityMonitoringConfig): monitoring_config object to
|
|
1859
|
+
override the global monitoring_config parameter of constraints suggested by
|
|
1860
|
+
Model Monitor Container. If not specified, the values suggested by container is
|
|
1861
|
+
set.
|
|
1862
|
+
Returns:
|
|
1863
|
+
sagemaker.processing.ProcessingJob: The ProcessingJob object representing the
|
|
1864
|
+
baselining job.
|
|
1865
|
+
|
|
1866
|
+
"""
|
|
1867
|
+
if not DataQualityMonitoringConfig.valid_monitoring_config(monitoring_config_override):
|
|
1868
|
+
raise RuntimeError("Invalid value for monitoring_config_override.")
|
|
1869
|
+
|
|
1870
|
+
self.latest_baselining_job_name = self._generate_baselining_job_name(job_name=job_name)
|
|
1871
|
+
|
|
1872
|
+
normalized_baseline_dataset_input = self._upload_and_convert_to_processing_input(
|
|
1873
|
+
source=baseline_dataset,
|
|
1874
|
+
destination=str(
|
|
1875
|
+
pathlib.PurePosixPath(
|
|
1876
|
+
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _BASELINE_DATASET_INPUT_NAME
|
|
1877
|
+
)
|
|
1878
|
+
),
|
|
1879
|
+
name=_BASELINE_DATASET_INPUT_NAME,
|
|
1880
|
+
)
|
|
1881
|
+
|
|
1882
|
+
# Unlike other input, dataset must be a directory for the Monitoring image.
|
|
1883
|
+
baseline_dataset_container_path = normalized_baseline_dataset_input.s3_input.local_path
|
|
1884
|
+
|
|
1885
|
+
normalized_record_preprocessor_script_input = self._upload_and_convert_to_processing_input(
|
|
1886
|
+
source=record_preprocessor_script,
|
|
1887
|
+
destination=str(
|
|
1888
|
+
pathlib.PurePosixPath(
|
|
1889
|
+
_CONTAINER_BASE_PATH,
|
|
1890
|
+
_CONTAINER_INPUT_PATH,
|
|
1891
|
+
_RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME,
|
|
1892
|
+
)
|
|
1893
|
+
),
|
|
1894
|
+
name=_RECORD_PREPROCESSOR_SCRIPT_INPUT_NAME,
|
|
1895
|
+
)
|
|
1896
|
+
|
|
1897
|
+
record_preprocessor_script_container_path = None
|
|
1898
|
+
if normalized_record_preprocessor_script_input is not None:
|
|
1899
|
+
record_preprocessor_script_container_path = str(
|
|
1900
|
+
pathlib.PurePosixPath(
|
|
1901
|
+
normalized_record_preprocessor_script_input.s3_input.local_path,
|
|
1902
|
+
os.path.basename(record_preprocessor_script),
|
|
1903
|
+
)
|
|
1904
|
+
)
|
|
1905
|
+
|
|
1906
|
+
normalized_post_processor_script_input = self._upload_and_convert_to_processing_input(
|
|
1907
|
+
source=post_analytics_processor_script,
|
|
1908
|
+
destination=str(
|
|
1909
|
+
pathlib.PurePosixPath(
|
|
1910
|
+
_CONTAINER_BASE_PATH,
|
|
1911
|
+
_CONTAINER_INPUT_PATH,
|
|
1912
|
+
_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
|
|
1913
|
+
)
|
|
1914
|
+
),
|
|
1915
|
+
name=_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
|
|
1916
|
+
)
|
|
1917
|
+
|
|
1918
|
+
post_processor_script_container_path = None
|
|
1919
|
+
if normalized_post_processor_script_input is not None:
|
|
1920
|
+
post_processor_script_container_path = str(
|
|
1921
|
+
pathlib.PurePosixPath(
|
|
1922
|
+
normalized_post_processor_script_input.s3_input.local_path,
|
|
1923
|
+
os.path.basename(post_analytics_processor_script),
|
|
1924
|
+
)
|
|
1925
|
+
)
|
|
1926
|
+
|
|
1927
|
+
normalized_baseline_output = self._normalize_baseline_output(output_s3_uri=output_s3_uri)
|
|
1928
|
+
|
|
1929
|
+
categorical_drift_method = None
|
|
1930
|
+
if monitoring_config_override and monitoring_config_override.distribution_constraints:
|
|
1931
|
+
distribution_constraints = monitoring_config_override.distribution_constraints
|
|
1932
|
+
categorical_drift_method = distribution_constraints.categorical_drift_method
|
|
1933
|
+
|
|
1934
|
+
normalized_env = self._generate_env_map(
|
|
1935
|
+
env=self.env,
|
|
1936
|
+
dataset_format=dataset_format,
|
|
1937
|
+
output_path=normalized_baseline_output.s3_output.local_path,
|
|
1938
|
+
enable_cloudwatch_metrics=False, # Only supported for monitoring schedules
|
|
1939
|
+
dataset_source_container_path=baseline_dataset_container_path,
|
|
1940
|
+
record_preprocessor_script_container_path=record_preprocessor_script_container_path,
|
|
1941
|
+
post_processor_script_container_path=post_processor_script_container_path,
|
|
1942
|
+
categorical_drift_method=categorical_drift_method,
|
|
1943
|
+
)
|
|
1944
|
+
|
|
1945
|
+
baselining_processor = Processor(
|
|
1946
|
+
role=self.role,
|
|
1947
|
+
image_uri=self.image_uri,
|
|
1948
|
+
instance_count=self.instance_count,
|
|
1949
|
+
instance_type=self.instance_type,
|
|
1950
|
+
entrypoint=self.entrypoint,
|
|
1951
|
+
volume_size_in_gb=self.volume_size_in_gb,
|
|
1952
|
+
volume_kms_key=self.volume_kms_key,
|
|
1953
|
+
output_kms_key=self.output_kms_key,
|
|
1954
|
+
max_runtime_in_seconds=self.max_runtime_in_seconds,
|
|
1955
|
+
base_job_name=self.base_job_name,
|
|
1956
|
+
sagemaker_session=self.sagemaker_session,
|
|
1957
|
+
env=normalized_env,
|
|
1958
|
+
tags=self.tags,
|
|
1959
|
+
network_config=self.network_config,
|
|
1960
|
+
)
|
|
1961
|
+
|
|
1962
|
+
baseline_job_inputs_with_nones = [
|
|
1963
|
+
normalized_baseline_dataset_input,
|
|
1964
|
+
normalized_record_preprocessor_script_input,
|
|
1965
|
+
normalized_post_processor_script_input,
|
|
1966
|
+
]
|
|
1967
|
+
|
|
1968
|
+
baseline_job_inputs = [
|
|
1969
|
+
baseline_job_input
|
|
1970
|
+
for baseline_job_input in baseline_job_inputs_with_nones
|
|
1971
|
+
if baseline_job_input is not None
|
|
1972
|
+
]
|
|
1973
|
+
|
|
1974
|
+
baselining_processor.run(
|
|
1975
|
+
inputs=baseline_job_inputs,
|
|
1976
|
+
outputs=[normalized_baseline_output],
|
|
1977
|
+
arguments=self.arguments,
|
|
1978
|
+
wait=wait,
|
|
1979
|
+
logs=logs,
|
|
1980
|
+
job_name=self.latest_baselining_job_name,
|
|
1981
|
+
)
|
|
1982
|
+
|
|
1983
|
+
# Create BaseliningJob manually since SageMaker 3.0 ProcessingJob has different attributes
|
|
1984
|
+
self.latest_baselining_job = BaseliningJob(
|
|
1985
|
+
sagemaker_session=self.sagemaker_session,
|
|
1986
|
+
job_name=self.latest_baselining_job_name,
|
|
1987
|
+
inputs=baseline_job_inputs,
|
|
1988
|
+
outputs=[normalized_baseline_output],
|
|
1989
|
+
output_kms_key=None,
|
|
1990
|
+
)
|
|
1991
|
+
self.baselining_jobs.append(self.latest_baselining_job)
|
|
1992
|
+
return baselining_processor.latest_job
|
|
1993
|
+
|
|
1994
|
+
def create_monitoring_schedule(
|
|
1995
|
+
self,
|
|
1996
|
+
endpoint_input=None,
|
|
1997
|
+
record_preprocessor_script=None,
|
|
1998
|
+
post_analytics_processor_script=None,
|
|
1999
|
+
output_s3_uri=None,
|
|
2000
|
+
constraints=None,
|
|
2001
|
+
statistics=None,
|
|
2002
|
+
monitor_schedule_name=None,
|
|
2003
|
+
schedule_cron_expression=None,
|
|
2004
|
+
enable_cloudwatch_metrics=True,
|
|
2005
|
+
batch_transform_input=None,
|
|
2006
|
+
data_analysis_start_time=None,
|
|
2007
|
+
data_analysis_end_time=None,
|
|
2008
|
+
):
|
|
2009
|
+
"""Creates a monitoring schedule to monitor an Amazon SageMaker Endpoint.
|
|
2010
|
+
|
|
2011
|
+
If constraints and statistics are provided, or if they are able to be retrieved from a
|
|
2012
|
+
previous baselining job associated with this monitor, those will be used.
|
|
2013
|
+
If constraints and statistics cannot be automatically retrieved, baseline_inputs will be
|
|
2014
|
+
required in order to kick off a baselining job.
|
|
2015
|
+
|
|
2016
|
+
Args:
|
|
2017
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to monitor.
|
|
2018
|
+
This can either be the endpoint name or an EndpointInput. (default: None)
|
|
2019
|
+
record_preprocessor_script (str): The path to the record preprocessor script. This can
|
|
2020
|
+
be a local path or an S3 uri.
|
|
2021
|
+
post_analytics_processor_script (str): The path to the record post-analytics processor
|
|
2022
|
+
script. This can be a local path or an S3 uri.
|
|
2023
|
+
output_s3_uri (str): Desired S3 destination of the constraint_violations and
|
|
2024
|
+
statistics json files.
|
|
2025
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
2026
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided alongside
|
|
2027
|
+
statistics, these will be used for monitoring the endpoint. This can be a
|
|
2028
|
+
sagemaker.model_monitor.Constraints object or an s3_uri pointing to a constraints
|
|
2029
|
+
JSON file.
|
|
2030
|
+
statistics (sagemaker.model_monitor.Statistic or str): If provided alongside
|
|
2031
|
+
constraints, these will be used for monitoring the endpoint. This can be a
|
|
2032
|
+
sagemaker.model_monitor.Statistics object or an s3_uri pointing to a statistics
|
|
2033
|
+
JSON file.
|
|
2034
|
+
monitor_schedule_name (str): Schedule name. If not specified, the processor generates
|
|
2035
|
+
a default job name, based on the image name and current timestamp.
|
|
2036
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
2037
|
+
this job run. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
2038
|
+
expressions. Default: Daily.
|
|
2039
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
2040
|
+
the baselining or monitoring jobs.
|
|
2041
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
2042
|
+
run the monitoring schedule on the batch transform (default: None)
|
|
2043
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
2044
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
2045
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
2046
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
2047
|
+
"""
|
|
2048
|
+
if self.job_definition_name is not None or self.monitoring_schedule_name is not None:
|
|
2049
|
+
message = (
|
|
2050
|
+
"It seems that this object was already used to create an Amazon Model "
|
|
2051
|
+
"Monitoring Schedule. To create another, first delete the existing one "
|
|
2052
|
+
"using my_monitor.delete_monitoring_schedule()."
|
|
2053
|
+
)
|
|
2054
|
+
logger.error(message)
|
|
2055
|
+
raise ValueError(message)
|
|
2056
|
+
|
|
2057
|
+
if (batch_transform_input is not None) ^ (endpoint_input is None):
|
|
2058
|
+
message = (
|
|
2059
|
+
"Need to have either batch_transform_input or endpoint_input to create an "
|
|
2060
|
+
"Amazon Model Monitoring Schedule. "
|
|
2061
|
+
"Please provide only one of the above required inputs"
|
|
2062
|
+
)
|
|
2063
|
+
logger.error(message)
|
|
2064
|
+
raise ValueError(message)
|
|
2065
|
+
|
|
2066
|
+
self._check_monitoring_schedule_cron_validity(
|
|
2067
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
2068
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
2069
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
2070
|
+
)
|
|
2071
|
+
|
|
2072
|
+
# create job definition
|
|
2073
|
+
monitor_schedule_name = self._generate_monitoring_schedule_name(
|
|
2074
|
+
schedule_name=monitor_schedule_name
|
|
2075
|
+
)
|
|
2076
|
+
new_job_definition_name = name_from_base(self.JOB_DEFINITION_BASE_NAME)
|
|
2077
|
+
request_dict = self._build_create_data_quality_job_definition_request(
|
|
2078
|
+
monitoring_schedule_name=monitor_schedule_name,
|
|
2079
|
+
job_definition_name=new_job_definition_name,
|
|
2080
|
+
image_uri=self.image_uri,
|
|
2081
|
+
latest_baselining_job_name=self.latest_baselining_job_name,
|
|
2082
|
+
endpoint_input=endpoint_input,
|
|
2083
|
+
record_preprocessor_script=record_preprocessor_script,
|
|
2084
|
+
post_analytics_processor_script=post_analytics_processor_script,
|
|
2085
|
+
output_s3_uri=self._normalize_monitoring_output(
|
|
2086
|
+
monitor_schedule_name, output_s3_uri
|
|
2087
|
+
).s3_output.s3_uri,
|
|
2088
|
+
constraints=constraints,
|
|
2089
|
+
statistics=statistics,
|
|
2090
|
+
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
|
|
2091
|
+
role=self.role,
|
|
2092
|
+
instance_count=self.instance_count,
|
|
2093
|
+
instance_type=self.instance_type,
|
|
2094
|
+
volume_size_in_gb=self.volume_size_in_gb,
|
|
2095
|
+
volume_kms_key=self.volume_kms_key,
|
|
2096
|
+
output_kms_key=self.output_kms_key,
|
|
2097
|
+
max_runtime_in_seconds=self.max_runtime_in_seconds,
|
|
2098
|
+
env=self.env,
|
|
2099
|
+
tags=self.tags,
|
|
2100
|
+
network_config=self.network_config,
|
|
2101
|
+
batch_transform_input=batch_transform_input,
|
|
2102
|
+
)
|
|
2103
|
+
self.sagemaker_session.sagemaker_client.create_data_quality_job_definition(**request_dict)
|
|
2104
|
+
|
|
2105
|
+
# create schedule
|
|
2106
|
+
try:
|
|
2107
|
+
self._create_monitoring_schedule_from_job_definition(
|
|
2108
|
+
monitor_schedule_name=monitor_schedule_name,
|
|
2109
|
+
job_definition_name=new_job_definition_name,
|
|
2110
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
2111
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
2112
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
2113
|
+
)
|
|
2114
|
+
self.job_definition_name = new_job_definition_name
|
|
2115
|
+
self.monitoring_schedule_name = monitor_schedule_name
|
|
2116
|
+
except Exception:
|
|
2117
|
+
logger.exception("Failed to create monitoring schedule.")
|
|
2118
|
+
self.monitoring_schedule_name = None
|
|
2119
|
+
# noinspection PyBroadException
|
|
2120
|
+
try:
|
|
2121
|
+
self.sagemaker_session.sagemaker_client.delete_data_quality_job_definition(
|
|
2122
|
+
JobDefinitionName=new_job_definition_name
|
|
2123
|
+
)
|
|
2124
|
+
except Exception: # pylint: disable=W0703
|
|
2125
|
+
message = "Failed to delete job definition {}.".format(new_job_definition_name)
|
|
2126
|
+
logger.exception(message)
|
|
2127
|
+
raise
|
|
2128
|
+
|
|
2129
|
+
def update_monitoring_schedule(
|
|
2130
|
+
self,
|
|
2131
|
+
endpoint_input=None,
|
|
2132
|
+
record_preprocessor_script=None,
|
|
2133
|
+
post_analytics_processor_script=None,
|
|
2134
|
+
output_s3_uri=None,
|
|
2135
|
+
statistics=None,
|
|
2136
|
+
constraints=None,
|
|
2137
|
+
schedule_cron_expression=None,
|
|
2138
|
+
instance_count=None,
|
|
2139
|
+
instance_type=None,
|
|
2140
|
+
volume_size_in_gb=None,
|
|
2141
|
+
volume_kms_key=None,
|
|
2142
|
+
output_kms_key=None,
|
|
2143
|
+
max_runtime_in_seconds=None,
|
|
2144
|
+
env=None,
|
|
2145
|
+
network_config=None,
|
|
2146
|
+
enable_cloudwatch_metrics=None,
|
|
2147
|
+
role=None,
|
|
2148
|
+
batch_transform_input=None,
|
|
2149
|
+
data_analysis_start_time=None,
|
|
2150
|
+
data_analysis_end_time=None,
|
|
2151
|
+
):
|
|
2152
|
+
"""Updates the existing monitoring schedule.
|
|
2153
|
+
|
|
2154
|
+
Args:
|
|
2155
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to monitor.
|
|
2156
|
+
This can either be the endpoint name or an EndpointInput.
|
|
2157
|
+
record_preprocessor_script (str): The path to the record preprocessor script. This can
|
|
2158
|
+
be a local path or an S3 uri.
|
|
2159
|
+
post_analytics_processor_script (str): The path to the record post-analytics processor
|
|
2160
|
+
script. This can be a local path or an S3 uri.
|
|
2161
|
+
output_s3_uri (str): Desired S3 destination of the constraint_violations and
|
|
2162
|
+
statistics json files.
|
|
2163
|
+
statistics (sagemaker.model_monitor.Statistic or str): If provided alongside
|
|
2164
|
+
constraints, these will be used for monitoring the endpoint. This can be a
|
|
2165
|
+
sagemaker.model_monitor.Statistics object or an S3 uri pointing to a statistics
|
|
2166
|
+
JSON file.
|
|
2167
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided alongside
|
|
2168
|
+
statistics, these will be used for monitoring the endpoint. This can be a
|
|
2169
|
+
sagemaker.model_monitor.Constraints object or an S3 uri pointing to a constraints
|
|
2170
|
+
JSON file.
|
|
2171
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
2172
|
+
this job runs at. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
2173
|
+
expressions.
|
|
2174
|
+
instance_count (int): The number of instances to run
|
|
2175
|
+
the jobs with.
|
|
2176
|
+
instance_type (str): Type of EC2 instance to use for
|
|
2177
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
2178
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
2179
|
+
to use for storing data during processing (default: 30).
|
|
2180
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
2181
|
+
output_kms_key (str): The KMS key id for the job's outputs.
|
|
2182
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
2183
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
2184
|
+
Default: 3600
|
|
2185
|
+
env (dict): Environment variables to be passed to the job.
|
|
2186
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
2187
|
+
object that configures network isolation, encryption of
|
|
2188
|
+
inter-container traffic, security group IDs, and subnets.
|
|
2189
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
2190
|
+
the baselining or monitoring jobs.
|
|
2191
|
+
role (str): An AWS IAM role name or ARN. The Amazon SageMaker jobs use this role.
|
|
2192
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
2193
|
+
run the monitoring schedule on the batch transform (default: None)
|
|
2194
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
2195
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
2196
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
2197
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
2198
|
+
|
|
2199
|
+
"""
|
|
2200
|
+
|
|
2201
|
+
if (batch_transform_input is not None) and (endpoint_input is not None):
|
|
2202
|
+
message = (
|
|
2203
|
+
"Cannot update both batch_transform_input and endpoint_input to update an "
|
|
2204
|
+
"Amazon Model Monitoring Schedule. "
|
|
2205
|
+
"Please provide atmost one of the above required inputs"
|
|
2206
|
+
)
|
|
2207
|
+
logger.error(message)
|
|
2208
|
+
raise ValueError(message)
|
|
2209
|
+
|
|
2210
|
+
# check if this schedule is in v2 format and update as per v2 format if it is
|
|
2211
|
+
if self.job_definition_name is not None:
|
|
2212
|
+
self._update_data_quality_monitoring_schedule(
|
|
2213
|
+
endpoint_input=endpoint_input,
|
|
2214
|
+
record_preprocessor_script=record_preprocessor_script,
|
|
2215
|
+
post_analytics_processor_script=post_analytics_processor_script,
|
|
2216
|
+
output_s3_uri=output_s3_uri,
|
|
2217
|
+
statistics=statistics,
|
|
2218
|
+
constraints=constraints,
|
|
2219
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
2220
|
+
instance_count=instance_count,
|
|
2221
|
+
instance_type=instance_type,
|
|
2222
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
2223
|
+
volume_kms_key=volume_kms_key,
|
|
2224
|
+
output_kms_key=output_kms_key,
|
|
2225
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
2226
|
+
env=env,
|
|
2227
|
+
network_config=network_config,
|
|
2228
|
+
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
|
|
2229
|
+
role=role,
|
|
2230
|
+
batch_transform_input=batch_transform_input,
|
|
2231
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
2232
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
2233
|
+
)
|
|
2234
|
+
return
|
|
2235
|
+
|
|
2236
|
+
monitoring_inputs = None
|
|
2237
|
+
if endpoint_input is not None:
|
|
2238
|
+
monitoring_inputs = [self._normalize_endpoint_input(endpoint_input)._to_request_dict()]
|
|
2239
|
+
|
|
2240
|
+
elif batch_transform_input is not None:
|
|
2241
|
+
monitoring_inputs = [batch_transform_input._to_request_dict()]
|
|
2242
|
+
|
|
2243
|
+
record_preprocessor_script_s3_uri = None
|
|
2244
|
+
if record_preprocessor_script is not None:
|
|
2245
|
+
record_preprocessor_script_s3_uri = self._s3_uri_from_local_path(
|
|
2246
|
+
path=record_preprocessor_script
|
|
2247
|
+
)
|
|
2248
|
+
|
|
2249
|
+
post_analytics_processor_script_s3_uri = None
|
|
2250
|
+
if post_analytics_processor_script is not None:
|
|
2251
|
+
post_analytics_processor_script_s3_uri = self._s3_uri_from_local_path(
|
|
2252
|
+
path=post_analytics_processor_script
|
|
2253
|
+
)
|
|
2254
|
+
|
|
2255
|
+
monitoring_output_config = None
|
|
2256
|
+
output_path = None
|
|
2257
|
+
if output_s3_uri is not None:
|
|
2258
|
+
normalized_monitoring_output = self._normalize_monitoring_output(
|
|
2259
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
2260
|
+
output_s3_uri=output_s3_uri,
|
|
2261
|
+
)
|
|
2262
|
+
monitoring_output_config = {
|
|
2263
|
+
"MonitoringOutputs": [normalized_monitoring_output._to_request_dict()]
|
|
2264
|
+
}
|
|
2265
|
+
output_path = normalized_monitoring_output.s3_output.local_path
|
|
2266
|
+
|
|
2267
|
+
if env is not None:
|
|
2268
|
+
self.env = env
|
|
2269
|
+
|
|
2270
|
+
normalized_env = self._generate_env_map(
|
|
2271
|
+
env=env, output_path=output_path, enable_cloudwatch_metrics=enable_cloudwatch_metrics
|
|
2272
|
+
)
|
|
2273
|
+
|
|
2274
|
+
statistics_object, constraints_object = self._get_baseline_files(
|
|
2275
|
+
statistics=statistics, constraints=constraints, sagemaker_session=self.sagemaker_session
|
|
2276
|
+
)
|
|
2277
|
+
|
|
2278
|
+
statistics_s3_uri = None
|
|
2279
|
+
if statistics_object is not None:
|
|
2280
|
+
statistics_s3_uri = statistics_object.file_s3_uri
|
|
2281
|
+
|
|
2282
|
+
constraints_s3_uri = None
|
|
2283
|
+
if constraints_object is not None:
|
|
2284
|
+
constraints_s3_uri = constraints_object.file_s3_uri
|
|
2285
|
+
|
|
2286
|
+
if instance_type is not None:
|
|
2287
|
+
self.instance_type = instance_type
|
|
2288
|
+
|
|
2289
|
+
if instance_count is not None:
|
|
2290
|
+
self.instance_count = instance_count
|
|
2291
|
+
|
|
2292
|
+
if volume_size_in_gb is not None:
|
|
2293
|
+
self.volume_size_in_gb = volume_size_in_gb
|
|
2294
|
+
|
|
2295
|
+
if volume_kms_key is not None:
|
|
2296
|
+
self.volume_kms_key = volume_kms_key
|
|
2297
|
+
|
|
2298
|
+
if output_kms_key is not None:
|
|
2299
|
+
self.output_kms_key = output_kms_key
|
|
2300
|
+
monitoring_output_config["KmsKeyId"] = self.output_kms_key
|
|
2301
|
+
|
|
2302
|
+
if max_runtime_in_seconds is not None:
|
|
2303
|
+
self.max_runtime_in_seconds = max_runtime_in_seconds
|
|
2304
|
+
|
|
2305
|
+
if network_config is not None:
|
|
2306
|
+
self.network_config = network_config
|
|
2307
|
+
|
|
2308
|
+
network_config_dict = None
|
|
2309
|
+
if self.network_config is not None:
|
|
2310
|
+
network_config_dict = self.network_config._to_request_dict()
|
|
2311
|
+
|
|
2312
|
+
if role is not None:
|
|
2313
|
+
self.role = role
|
|
2314
|
+
|
|
2315
|
+
boto_update_monitoring_schedule(
|
|
2316
|
+
sagemaker_session=self.sagemaker_session,
|
|
2317
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
2318
|
+
schedule_expression=schedule_cron_expression,
|
|
2319
|
+
constraints_s3_uri=constraints_s3_uri,
|
|
2320
|
+
statistics_s3_uri=statistics_s3_uri,
|
|
2321
|
+
monitoring_inputs=monitoring_inputs,
|
|
2322
|
+
monitoring_output_config=monitoring_output_config,
|
|
2323
|
+
instance_count=instance_count,
|
|
2324
|
+
instance_type=instance_type,
|
|
2325
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
2326
|
+
volume_kms_key=volume_kms_key,
|
|
2327
|
+
record_preprocessor_source_uri=record_preprocessor_script_s3_uri,
|
|
2328
|
+
post_analytics_processor_source_uri=post_analytics_processor_script_s3_uri,
|
|
2329
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
2330
|
+
environment=normalized_env,
|
|
2331
|
+
network_config=network_config_dict,
|
|
2332
|
+
role_arn=expand_role(self.sagemaker_session, self.role),
|
|
2333
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
2334
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
2335
|
+
)
|
|
2336
|
+
|
|
2337
|
+
self._wait_for_schedule_changes_to_apply()
|
|
2338
|
+
|
|
2339
|
+
def _update_data_quality_monitoring_schedule(
|
|
2340
|
+
self,
|
|
2341
|
+
endpoint_input=None,
|
|
2342
|
+
record_preprocessor_script=None,
|
|
2343
|
+
post_analytics_processor_script=None,
|
|
2344
|
+
output_s3_uri=None,
|
|
2345
|
+
constraints=None,
|
|
2346
|
+
statistics=None,
|
|
2347
|
+
schedule_cron_expression=None,
|
|
2348
|
+
enable_cloudwatch_metrics=None,
|
|
2349
|
+
role=None,
|
|
2350
|
+
instance_count=None,
|
|
2351
|
+
instance_type=None,
|
|
2352
|
+
volume_size_in_gb=None,
|
|
2353
|
+
volume_kms_key=None,
|
|
2354
|
+
output_kms_key=None,
|
|
2355
|
+
max_runtime_in_seconds=None,
|
|
2356
|
+
env=None,
|
|
2357
|
+
network_config=None,
|
|
2358
|
+
batch_transform_input=None,
|
|
2359
|
+
data_analysis_start_time=None,
|
|
2360
|
+
data_analysis_end_time=None,
|
|
2361
|
+
):
|
|
2362
|
+
"""Updates the existing monitoring schedule.
|
|
2363
|
+
|
|
2364
|
+
Args:
|
|
2365
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to monitor.
|
|
2366
|
+
This can either be the endpoint name or an EndpointInput.
|
|
2367
|
+
record_preprocessor_script (str): The path to the record preprocessor script. This can
|
|
2368
|
+
be a local path or an S3 uri.
|
|
2369
|
+
post_analytics_processor_script (str): The path to the record post-analytics processor
|
|
2370
|
+
script. This can be a local path or an S3 uri.
|
|
2371
|
+
output_s3_uri (str): S3 destination of the constraint_violations and analysis result.
|
|
2372
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
2373
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided it will be used
|
|
2374
|
+
for monitoring the endpoint. It can be a Constraints object or an S3 uri pointing
|
|
2375
|
+
to a constraints JSON file.
|
|
2376
|
+
statistics (sagemaker.model_monitor.Statistic or str): If provided alongside
|
|
2377
|
+
constraints, these will be used for monitoring the endpoint. This can be a
|
|
2378
|
+
sagemaker.model_monitor.Statistics object or an S3 uri pointing to a statistics
|
|
2379
|
+
JSON file.
|
|
2380
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
2381
|
+
this job run. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
2382
|
+
expressions. Default: Daily.
|
|
2383
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
2384
|
+
the baselining or monitoring jobs.
|
|
2385
|
+
role (str): An AWS IAM role. The Amazon SageMaker jobs use this role.
|
|
2386
|
+
instance_count (int): The number of instances to run
|
|
2387
|
+
the jobs with.
|
|
2388
|
+
instance_type (str): Type of EC2 instance to use for
|
|
2389
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
2390
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
2391
|
+
to use for storing data during processing (default: 30).
|
|
2392
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
2393
|
+
output_kms_key (str): The KMS key id for the job's outputs.
|
|
2394
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
2395
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
2396
|
+
Default: 3600
|
|
2397
|
+
env (dict): Environment variables to be passed to the job.
|
|
2398
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
2399
|
+
object that configures network isolation, encryption of
|
|
2400
|
+
inter-container traffic, security group IDs, and subnets.
|
|
2401
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
2402
|
+
run the monitoring schedule on the batch transform (default: None)
|
|
2403
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
2404
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
2405
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
2406
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
2407
|
+
"""
|
|
2408
|
+
valid_args = {
|
|
2409
|
+
arg: value for arg, value in locals().items() if arg != "self" and value is not None
|
|
2410
|
+
}
|
|
2411
|
+
|
|
2412
|
+
# Nothing to update
|
|
2413
|
+
if len(valid_args) <= 0:
|
|
2414
|
+
return
|
|
2415
|
+
|
|
2416
|
+
# Only need to update schedule expression
|
|
2417
|
+
if len(valid_args) == 1 and schedule_cron_expression is not None:
|
|
2418
|
+
self._update_monitoring_schedule(
|
|
2419
|
+
self.job_definition_name,
|
|
2420
|
+
schedule_cron_expression,
|
|
2421
|
+
data_analysis_start_time,
|
|
2422
|
+
data_analysis_end_time,
|
|
2423
|
+
)
|
|
2424
|
+
return
|
|
2425
|
+
|
|
2426
|
+
existing_desc = boto_describe_monitoring_schedule(
|
|
2427
|
+
sagemaker_session=self.sagemaker_session,
|
|
2428
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
2429
|
+
)
|
|
2430
|
+
|
|
2431
|
+
if (
|
|
2432
|
+
existing_desc.get("MonitoringScheduleConfig") is not None
|
|
2433
|
+
and existing_desc["MonitoringScheduleConfig"].get("ScheduleConfig") is not None
|
|
2434
|
+
and existing_desc["MonitoringScheduleConfig"]["ScheduleConfig"]["ScheduleExpression"]
|
|
2435
|
+
is not None
|
|
2436
|
+
and schedule_cron_expression is None
|
|
2437
|
+
):
|
|
2438
|
+
schedule_cron_expression = existing_desc["MonitoringScheduleConfig"]["ScheduleConfig"][
|
|
2439
|
+
"ScheduleExpression"
|
|
2440
|
+
]
|
|
2441
|
+
|
|
2442
|
+
# Need to update schedule with a new job definition
|
|
2443
|
+
job_desc = self.sagemaker_session.sagemaker_client.describe_data_quality_job_definition(
|
|
2444
|
+
JobDefinitionName=self.job_definition_name
|
|
2445
|
+
)
|
|
2446
|
+
new_job_definition_name = name_from_base(self.JOB_DEFINITION_BASE_NAME)
|
|
2447
|
+
request_dict = self._build_create_data_quality_job_definition_request(
|
|
2448
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
2449
|
+
job_definition_name=new_job_definition_name,
|
|
2450
|
+
image_uri=self.image_uri,
|
|
2451
|
+
existing_job_desc=job_desc,
|
|
2452
|
+
endpoint_input=endpoint_input,
|
|
2453
|
+
record_preprocessor_script=record_preprocessor_script,
|
|
2454
|
+
post_analytics_processor_script=post_analytics_processor_script,
|
|
2455
|
+
output_s3_uri=output_s3_uri,
|
|
2456
|
+
statistics=statistics,
|
|
2457
|
+
constraints=constraints,
|
|
2458
|
+
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
|
|
2459
|
+
role=role,
|
|
2460
|
+
instance_count=instance_count,
|
|
2461
|
+
instance_type=instance_type,
|
|
2462
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
2463
|
+
volume_kms_key=volume_kms_key,
|
|
2464
|
+
output_kms_key=output_kms_key,
|
|
2465
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
2466
|
+
env=env,
|
|
2467
|
+
tags=self.tags,
|
|
2468
|
+
network_config=network_config,
|
|
2469
|
+
batch_transform_input=batch_transform_input,
|
|
2470
|
+
)
|
|
2471
|
+
self.sagemaker_session.sagemaker_client.create_data_quality_job_definition(**request_dict)
|
|
2472
|
+
try:
|
|
2473
|
+
self._update_monitoring_schedule(
|
|
2474
|
+
job_definition_name=new_job_definition_name,
|
|
2475
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
2476
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
2477
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
2478
|
+
)
|
|
2479
|
+
self.job_definition_name = new_job_definition_name
|
|
2480
|
+
if role is not None:
|
|
2481
|
+
self.role = role
|
|
2482
|
+
if instance_count is not None:
|
|
2483
|
+
self.instance_count = instance_count
|
|
2484
|
+
if instance_type is not None:
|
|
2485
|
+
self.instance_type = instance_type
|
|
2486
|
+
if volume_size_in_gb is not None:
|
|
2487
|
+
self.volume_size_in_gb = volume_size_in_gb
|
|
2488
|
+
if volume_kms_key is not None:
|
|
2489
|
+
self.volume_kms_key = volume_kms_key
|
|
2490
|
+
if output_kms_key is not None:
|
|
2491
|
+
self.output_kms_key = output_kms_key
|
|
2492
|
+
if max_runtime_in_seconds is not None:
|
|
2493
|
+
self.max_runtime_in_seconds = max_runtime_in_seconds
|
|
2494
|
+
if env is not None:
|
|
2495
|
+
self.env = env
|
|
2496
|
+
if network_config is not None:
|
|
2497
|
+
self.network_config = network_config
|
|
2498
|
+
except Exception:
|
|
2499
|
+
logger.exception("Failed to update monitoring schedule.")
|
|
2500
|
+
# noinspection PyBroadException
|
|
2501
|
+
try:
|
|
2502
|
+
self.sagemaker_session.sagemaker_client.delete_data_quality_job_definition(
|
|
2503
|
+
JobDefinitionName=new_job_definition_name
|
|
2504
|
+
)
|
|
2505
|
+
except Exception: # pylint: disable=W0703
|
|
2506
|
+
message = "Failed to delete job definition {}.".format(new_job_definition_name)
|
|
2507
|
+
logger.exception(message)
|
|
2508
|
+
raise
|
|
2509
|
+
|
|
2510
|
+
def delete_monitoring_schedule(self):
|
|
2511
|
+
"""Deletes the monitoring schedule and its job definition."""
|
|
2512
|
+
super(DefaultModelMonitor, self).delete_monitoring_schedule()
|
|
2513
|
+
if self.job_definition_name is not None:
|
|
2514
|
+
# Delete job definition.
|
|
2515
|
+
message = "Deleting Data Quality Job Definition with name: {}".format(
|
|
2516
|
+
self.job_definition_name
|
|
2517
|
+
)
|
|
2518
|
+
logger.info(message)
|
|
2519
|
+
self.sagemaker_session.sagemaker_client.delete_data_quality_job_definition(
|
|
2520
|
+
JobDefinitionName=self.job_definition_name
|
|
2521
|
+
)
|
|
2522
|
+
self.job_definition_name = None
|
|
2523
|
+
|
|
2524
|
+
def run_baseline(self):
|
|
2525
|
+
"""Not implemented.
|
|
2526
|
+
|
|
2527
|
+
'.run_baseline()' is only allowed for ModelMonitor objects. Please use
|
|
2528
|
+
`suggest_baseline` for DefaultModelMonitor objects, instead.
|
|
2529
|
+
|
|
2530
|
+
Raises:
|
|
2531
|
+
NotImplementedError
|
|
2532
|
+
"""
|
|
2533
|
+
raise NotImplementedError(
|
|
2534
|
+
"'.run_baseline()' is only allowed for ModelMonitor objects. "
|
|
2535
|
+
"Please use suggest_baseline for DefaultModelMonitor objects, instead."
|
|
2536
|
+
)
|
|
2537
|
+
|
|
2538
|
+
@classmethod
|
|
2539
|
+
def attach(cls, monitor_schedule_name, sagemaker_session=None):
|
|
2540
|
+
"""Sets this object's schedule name to the name provided.
|
|
2541
|
+
|
|
2542
|
+
This allows subsequent describe_schedule or list_executions calls to point
|
|
2543
|
+
to the given schedule.
|
|
2544
|
+
|
|
2545
|
+
Args:
|
|
2546
|
+
monitor_schedule_name (str): The name of the schedule to attach to.
|
|
2547
|
+
sagemaker_session (sagemaker.core.helper.session.Session): Session object which
|
|
2548
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
2549
|
+
AWS services needed. If not specified, one is created using
|
|
2550
|
+
the default AWS configuration chain.
|
|
2551
|
+
"""
|
|
2552
|
+
sagemaker_session = sagemaker_session or Session()
|
|
2553
|
+
schedule_desc = boto_describe_monitoring_schedule(
|
|
2554
|
+
sagemaker_session=sagemaker_session, monitoring_schedule_name=monitor_schedule_name
|
|
2555
|
+
)
|
|
2556
|
+
|
|
2557
|
+
job_definition_name = schedule_desc["MonitoringScheduleConfig"].get(
|
|
2558
|
+
"MonitoringJobDefinitionName"
|
|
2559
|
+
)
|
|
2560
|
+
if job_definition_name:
|
|
2561
|
+
monitoring_type = schedule_desc["MonitoringScheduleConfig"].get("MonitoringType")
|
|
2562
|
+
if monitoring_type != cls.monitoring_type():
|
|
2563
|
+
raise TypeError(
|
|
2564
|
+
"{} can only attach to Data quality monitoring schedule.".format(
|
|
2565
|
+
__class__.__name__
|
|
2566
|
+
)
|
|
2567
|
+
)
|
|
2568
|
+
job_desc = sagemaker_session.sagemaker_client.describe_data_quality_job_definition(
|
|
2569
|
+
JobDefinitionName=job_definition_name
|
|
2570
|
+
)
|
|
2571
|
+
tags = list_tags(
|
|
2572
|
+
sagemaker_session=sagemaker_session,
|
|
2573
|
+
resource_arn=schedule_desc["MonitoringScheduleArn"],
|
|
2574
|
+
)
|
|
2575
|
+
|
|
2576
|
+
return ModelMonitor._attach(
|
|
2577
|
+
clazz=cls,
|
|
2578
|
+
sagemaker_session=sagemaker_session,
|
|
2579
|
+
schedule_desc=schedule_desc,
|
|
2580
|
+
job_desc=job_desc,
|
|
2581
|
+
tags=tags,
|
|
2582
|
+
)
|
|
2583
|
+
|
|
2584
|
+
job_definition = schedule_desc["MonitoringScheduleConfig"]["MonitoringJobDefinition"]
|
|
2585
|
+
role = job_definition["RoleArn"]
|
|
2586
|
+
cluster_config = job_definition["MonitoringResources"]["ClusterConfig"]
|
|
2587
|
+
instance_count = cluster_config["InstanceCount"]
|
|
2588
|
+
instance_type = cluster_config["InstanceType"]
|
|
2589
|
+
volume_size_in_gb = cluster_config["VolumeSizeInGB"]
|
|
2590
|
+
volume_kms_key = cluster_config.get("VolumeKmsKeyId")
|
|
2591
|
+
output_kms_key = job_definition["MonitoringOutputConfig"].get("KmsKeyId")
|
|
2592
|
+
max_runtime_in_seconds = job_definition.get("StoppingCondition", {}).get(
|
|
2593
|
+
"MaxRuntimeInSeconds"
|
|
2594
|
+
)
|
|
2595
|
+
env = job_definition["Environment"]
|
|
2596
|
+
|
|
2597
|
+
network_config_dict = job_definition.get("NetworkConfig", {})
|
|
2598
|
+
network_config = None
|
|
2599
|
+
if network_config_dict:
|
|
2600
|
+
vpc_config = network_config_dict.get("VpcConfig", {})
|
|
2601
|
+
security_group_ids = vpc_config.get("SecurityGroupIds")
|
|
2602
|
+
subnets = vpc_config.get("Subnets")
|
|
2603
|
+
network_config = NetworkConfig(
|
|
2604
|
+
enable_network_isolation=network_config_dict["EnableNetworkIsolation"],
|
|
2605
|
+
encrypt_inter_container_traffic=network_config_dict[
|
|
2606
|
+
"EnableInterContainerTrafficEncryption"
|
|
2607
|
+
],
|
|
2608
|
+
security_group_ids=security_group_ids,
|
|
2609
|
+
subnets=subnets,
|
|
2610
|
+
)
|
|
2611
|
+
|
|
2612
|
+
tags = list_tags(
|
|
2613
|
+
sagemaker_session=sagemaker_session, resource_arn=schedule_desc["MonitoringScheduleArn"]
|
|
2614
|
+
)
|
|
2615
|
+
|
|
2616
|
+
attached_monitor = cls(
|
|
2617
|
+
role=role,
|
|
2618
|
+
instance_count=instance_count,
|
|
2619
|
+
instance_type=instance_type,
|
|
2620
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
2621
|
+
volume_kms_key=volume_kms_key,
|
|
2622
|
+
output_kms_key=output_kms_key,
|
|
2623
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
2624
|
+
sagemaker_session=sagemaker_session,
|
|
2625
|
+
env=env,
|
|
2626
|
+
tags=tags,
|
|
2627
|
+
network_config=network_config,
|
|
2628
|
+
)
|
|
2629
|
+
attached_monitor.monitoring_schedule_name = monitor_schedule_name
|
|
2630
|
+
return attached_monitor
|
|
2631
|
+
|
|
2632
|
+
def latest_monitoring_statistics(self):
|
|
2633
|
+
"""Returns the sagemaker.model_monitor.Statistics.
|
|
2634
|
+
|
|
2635
|
+
These are the statistics generated by the latest monitoring execution.
|
|
2636
|
+
|
|
2637
|
+
Returns:
|
|
2638
|
+
sagemaker.model_monitoring.Statistics: The Statistics object representing the file
|
|
2639
|
+
generated by the latest monitoring execution.
|
|
2640
|
+
|
|
2641
|
+
"""
|
|
2642
|
+
executions = self.list_executions()
|
|
2643
|
+
if len(executions) == 0:
|
|
2644
|
+
logger.warning(
|
|
2645
|
+
"No executions found for schedule. monitoring_schedule_name: %s",
|
|
2646
|
+
self.monitoring_schedule_name,
|
|
2647
|
+
)
|
|
2648
|
+
return None
|
|
2649
|
+
|
|
2650
|
+
latest_monitoring_execution = executions[-1]
|
|
2651
|
+
|
|
2652
|
+
try:
|
|
2653
|
+
return latest_monitoring_execution.statistics()
|
|
2654
|
+
except ClientError:
|
|
2655
|
+
status = latest_monitoring_execution.describe()["ProcessingJobStatus"]
|
|
2656
|
+
logger.warning(
|
|
2657
|
+
"Unable to retrieve statistics as job is in status '%s'. Latest statistics only "
|
|
2658
|
+
"available for completed executions.",
|
|
2659
|
+
status,
|
|
2660
|
+
)
|
|
2661
|
+
|
|
2662
|
+
def latest_monitoring_constraint_violations(self):
|
|
2663
|
+
"""Returns the sagemaker.model_monitor.
|
|
2664
|
+
|
|
2665
|
+
ConstraintViolations generated by the latest monitoring execution.
|
|
2666
|
+
|
|
2667
|
+
Returns:
|
|
2668
|
+
sagemaker.model_monitoring.ConstraintViolations: The ConstraintViolations object
|
|
2669
|
+
representing the file generated by the latest monitoring execution.
|
|
2670
|
+
|
|
2671
|
+
"""
|
|
2672
|
+
executions = self.list_executions()
|
|
2673
|
+
if len(executions) == 0:
|
|
2674
|
+
logger.warning(
|
|
2675
|
+
"No executions found for schedule. monitoring_schedule_name: %s",
|
|
2676
|
+
self.monitoring_schedule_name,
|
|
2677
|
+
)
|
|
2678
|
+
return None
|
|
2679
|
+
|
|
2680
|
+
latest_monitoring_execution = executions[-1]
|
|
2681
|
+
try:
|
|
2682
|
+
return latest_monitoring_execution.constraint_violations()
|
|
2683
|
+
except ClientError:
|
|
2684
|
+
status = latest_monitoring_execution.describe()["ProcessingJobStatus"]
|
|
2685
|
+
logger.warning(
|
|
2686
|
+
"Unable to retrieve constraint violations as job is in status '%s'. Latest "
|
|
2687
|
+
"violations only available for completed executions.",
|
|
2688
|
+
status,
|
|
2689
|
+
)
|
|
2690
|
+
|
|
2691
|
+
@staticmethod
|
|
2692
|
+
def _get_default_image_uri(region):
|
|
2693
|
+
"""Returns the Default Model Monitoring image uri based on the region.
|
|
2694
|
+
|
|
2695
|
+
Args:
|
|
2696
|
+
region (str): The AWS region.
|
|
2697
|
+
|
|
2698
|
+
Returns:
|
|
2699
|
+
str: The Default Model Monitoring image uri based on the region.
|
|
2700
|
+
"""
|
|
2701
|
+
return image_uris.retrieve(framework=framework_name, region=region)
|
|
2702
|
+
|
|
2703
|
+
def _build_create_data_quality_job_definition_request(
|
|
2704
|
+
self,
|
|
2705
|
+
monitoring_schedule_name,
|
|
2706
|
+
job_definition_name,
|
|
2707
|
+
image_uri,
|
|
2708
|
+
latest_baselining_job_name=None,
|
|
2709
|
+
existing_job_desc=None,
|
|
2710
|
+
endpoint_input=None,
|
|
2711
|
+
record_preprocessor_script=None,
|
|
2712
|
+
post_analytics_processor_script=None,
|
|
2713
|
+
output_s3_uri=None,
|
|
2714
|
+
statistics=None,
|
|
2715
|
+
constraints=None,
|
|
2716
|
+
enable_cloudwatch_metrics=None,
|
|
2717
|
+
role=None,
|
|
2718
|
+
instance_count=None,
|
|
2719
|
+
instance_type=None,
|
|
2720
|
+
volume_size_in_gb=None,
|
|
2721
|
+
volume_kms_key=None,
|
|
2722
|
+
output_kms_key=None,
|
|
2723
|
+
max_runtime_in_seconds=None,
|
|
2724
|
+
env=None,
|
|
2725
|
+
tags=None,
|
|
2726
|
+
network_config=None,
|
|
2727
|
+
batch_transform_input=None,
|
|
2728
|
+
):
|
|
2729
|
+
"""Build the request for job definition creation API
|
|
2730
|
+
|
|
2731
|
+
Args:
|
|
2732
|
+
monitoring_schedule_name (str): Monitoring schedule name.
|
|
2733
|
+
job_definition_name (str): Job definition name.
|
|
2734
|
+
If not specified then a default one will be generated.
|
|
2735
|
+
image_uri (str): The uri of the image to use for the jobs started by the Monitor.
|
|
2736
|
+
latest_baselining_job_name (str): name of the last baselining job.
|
|
2737
|
+
existing_job_desc (dict): description of existing job definition. It will be updated by
|
|
2738
|
+
values that were passed in, and then used to create the new job definition.
|
|
2739
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to monitor.
|
|
2740
|
+
This can either be the endpoint name or an EndpointInput.
|
|
2741
|
+
output_s3_uri (str): S3 destination of the constraint_violations and analysis result.
|
|
2742
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
2743
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided it will be used
|
|
2744
|
+
for monitoring the endpoint. It can be a Constraints object or an S3 uri pointing
|
|
2745
|
+
to a constraints JSON file.
|
|
2746
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
2747
|
+
the baselining or monitoring jobs.
|
|
2748
|
+
role (str): An AWS IAM role. The Amazon SageMaker jobs use this role.
|
|
2749
|
+
instance_count (int): The number of instances to run
|
|
2750
|
+
the jobs with.
|
|
2751
|
+
instance_type (str): Type of EC2 instance to use for
|
|
2752
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
2753
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
2754
|
+
to use for storing data during processing (default: 30).
|
|
2755
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
2756
|
+
output_kms_key (str): KMS key id for output.
|
|
2757
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
2758
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
2759
|
+
Default: 3600
|
|
2760
|
+
env (dict): Environment variables to be passed to the job.
|
|
2761
|
+
tags (Optional[Tags]): List of tags to be passed to the job.
|
|
2762
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
2763
|
+
object that configures network isolation, encryption of
|
|
2764
|
+
inter-container traffic, security group IDs, and subnets.
|
|
2765
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
2766
|
+
run the monitoring schedule on the batch transform
|
|
2767
|
+
|
|
2768
|
+
Returns:
|
|
2769
|
+
dict: request parameters to create job definition.
|
|
2770
|
+
"""
|
|
2771
|
+
if existing_job_desc is not None:
|
|
2772
|
+
app_specification = existing_job_desc[
|
|
2773
|
+
"{}AppSpecification".format(self.monitoring_type())
|
|
2774
|
+
]
|
|
2775
|
+
baseline_config = existing_job_desc.get(
|
|
2776
|
+
"{}BaselineConfig".format(self.monitoring_type()), {}
|
|
2777
|
+
)
|
|
2778
|
+
job_input = existing_job_desc["{}JobInput".format(self.monitoring_type())]
|
|
2779
|
+
job_output = existing_job_desc["{}JobOutputConfig".format(self.monitoring_type())]
|
|
2780
|
+
cluster_config = existing_job_desc["JobResources"]["ClusterConfig"]
|
|
2781
|
+
if role is None:
|
|
2782
|
+
role = existing_job_desc["RoleArn"]
|
|
2783
|
+
existing_network_config = existing_job_desc.get("NetworkConfig")
|
|
2784
|
+
stop_condition = existing_job_desc.get("StoppingCondition", {})
|
|
2785
|
+
else:
|
|
2786
|
+
app_specification = {}
|
|
2787
|
+
baseline_config = {}
|
|
2788
|
+
job_input = {}
|
|
2789
|
+
job_output = {}
|
|
2790
|
+
cluster_config = {}
|
|
2791
|
+
existing_network_config = None
|
|
2792
|
+
stop_condition = {}
|
|
2793
|
+
|
|
2794
|
+
# app specification
|
|
2795
|
+
record_preprocessor_script_s3_uri = None
|
|
2796
|
+
if record_preprocessor_script is not None:
|
|
2797
|
+
record_preprocessor_script_s3_uri = self._s3_uri_from_local_path(
|
|
2798
|
+
path=record_preprocessor_script
|
|
2799
|
+
)
|
|
2800
|
+
|
|
2801
|
+
post_analytics_processor_script_s3_uri = None
|
|
2802
|
+
if post_analytics_processor_script is not None:
|
|
2803
|
+
post_analytics_processor_script_s3_uri = self._s3_uri_from_local_path(
|
|
2804
|
+
path=post_analytics_processor_script
|
|
2805
|
+
)
|
|
2806
|
+
|
|
2807
|
+
app_specification["ImageUri"] = image_uri
|
|
2808
|
+
if post_analytics_processor_script_s3_uri:
|
|
2809
|
+
app_specification["PostAnalyticsProcessorSourceUri"] = (
|
|
2810
|
+
post_analytics_processor_script_s3_uri
|
|
2811
|
+
)
|
|
2812
|
+
if record_preprocessor_script_s3_uri:
|
|
2813
|
+
app_specification["RecordPreprocessorSourceUri"] = record_preprocessor_script_s3_uri
|
|
2814
|
+
|
|
2815
|
+
normalized_env = self._generate_env_map(
|
|
2816
|
+
env=env,
|
|
2817
|
+
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
|
|
2818
|
+
)
|
|
2819
|
+
if normalized_env:
|
|
2820
|
+
app_specification["Environment"] = normalized_env
|
|
2821
|
+
|
|
2822
|
+
# baseline config
|
|
2823
|
+
# noinspection PyTypeChecker
|
|
2824
|
+
statistics_object, constraints_object = self._get_baseline_files(
|
|
2825
|
+
statistics=statistics,
|
|
2826
|
+
constraints=constraints,
|
|
2827
|
+
sagemaker_session=self.sagemaker_session,
|
|
2828
|
+
)
|
|
2829
|
+
if constraints_object is not None:
|
|
2830
|
+
constraints_s3_uri = constraints_object.file_s3_uri
|
|
2831
|
+
baseline_config["ConstraintsResource"] = dict(S3Uri=constraints_s3_uri)
|
|
2832
|
+
if statistics_object is not None:
|
|
2833
|
+
statistics_s3_uri = statistics_object.file_s3_uri
|
|
2834
|
+
baseline_config["StatisticsResource"] = dict(S3Uri=statistics_s3_uri)
|
|
2835
|
+
# ConstraintsResource and BaseliningJobName can co-exist in BYOC case
|
|
2836
|
+
if latest_baselining_job_name:
|
|
2837
|
+
baseline_config["BaseliningJobName"] = latest_baselining_job_name
|
|
2838
|
+
|
|
2839
|
+
# job input
|
|
2840
|
+
if endpoint_input is not None:
|
|
2841
|
+
normalized_endpoint_input = self._normalize_endpoint_input(
|
|
2842
|
+
endpoint_input=endpoint_input
|
|
2843
|
+
)
|
|
2844
|
+
job_input = normalized_endpoint_input._to_request_dict()
|
|
2845
|
+
elif batch_transform_input is not None:
|
|
2846
|
+
job_input = batch_transform_input._to_request_dict()
|
|
2847
|
+
|
|
2848
|
+
# job output
|
|
2849
|
+
if output_s3_uri is not None:
|
|
2850
|
+
normalized_monitoring_output = self._normalize_monitoring_output(
|
|
2851
|
+
monitoring_schedule_name, output_s3_uri
|
|
2852
|
+
)
|
|
2853
|
+
job_output["MonitoringOutputs"] = [normalized_monitoring_output._to_request_dict()]
|
|
2854
|
+
if output_kms_key is not None:
|
|
2855
|
+
job_output["KmsKeyId"] = output_kms_key
|
|
2856
|
+
|
|
2857
|
+
# cluster config
|
|
2858
|
+
if instance_count is not None:
|
|
2859
|
+
cluster_config["InstanceCount"] = instance_count
|
|
2860
|
+
if instance_type is not None:
|
|
2861
|
+
cluster_config["InstanceType"] = instance_type
|
|
2862
|
+
if volume_size_in_gb is not None:
|
|
2863
|
+
cluster_config["VolumeSizeInGB"] = volume_size_in_gb
|
|
2864
|
+
if volume_kms_key is not None:
|
|
2865
|
+
cluster_config["VolumeKmsKeyId"] = volume_kms_key
|
|
2866
|
+
|
|
2867
|
+
# stop condition
|
|
2868
|
+
if max_runtime_in_seconds is not None:
|
|
2869
|
+
stop_condition["MaxRuntimeInSeconds"] = max_runtime_in_seconds
|
|
2870
|
+
|
|
2871
|
+
request_dict = {
|
|
2872
|
+
"JobDefinitionName": job_definition_name,
|
|
2873
|
+
"{}AppSpecification".format(self.monitoring_type()): app_specification,
|
|
2874
|
+
"{}JobInput".format(self.monitoring_type()): job_input,
|
|
2875
|
+
"{}JobOutputConfig".format(self.monitoring_type()): job_output,
|
|
2876
|
+
"JobResources": dict(ClusterConfig=cluster_config),
|
|
2877
|
+
"RoleArn": expand_role(self.sagemaker_session, role),
|
|
2878
|
+
}
|
|
2879
|
+
|
|
2880
|
+
if baseline_config:
|
|
2881
|
+
request_dict["{}BaselineConfig".format(self.monitoring_type())] = baseline_config
|
|
2882
|
+
|
|
2883
|
+
if network_config is not None:
|
|
2884
|
+
network_config_dict = network_config._to_request_dict()
|
|
2885
|
+
request_dict["NetworkConfig"] = network_config_dict
|
|
2886
|
+
elif existing_network_config is not None:
|
|
2887
|
+
request_dict["NetworkConfig"] = existing_network_config
|
|
2888
|
+
|
|
2889
|
+
if stop_condition:
|
|
2890
|
+
request_dict["StoppingCondition"] = stop_condition
|
|
2891
|
+
|
|
2892
|
+
if tags is not None:
|
|
2893
|
+
request_dict["Tags"] = format_tags(tags)
|
|
2894
|
+
|
|
2895
|
+
return request_dict
|
|
2896
|
+
|
|
2897
|
+
|
|
2898
|
+
class ModelQualityMonitor(ModelMonitor):
|
|
2899
|
+
"""Amazon SageMaker model monitor to monitor quality metrics for an endpoint.
|
|
2900
|
+
|
|
2901
|
+
Please see the __init__ method of its base class for how to instantiate it.
|
|
2902
|
+
"""
|
|
2903
|
+
|
|
2904
|
+
JOB_DEFINITION_BASE_NAME = "model-quality-job-definition"
|
|
2905
|
+
|
|
2906
|
+
def __init__(
|
|
2907
|
+
self,
|
|
2908
|
+
role=None,
|
|
2909
|
+
instance_count=1,
|
|
2910
|
+
instance_type="ml.m5.xlarge",
|
|
2911
|
+
volume_size_in_gb=30,
|
|
2912
|
+
volume_kms_key=None,
|
|
2913
|
+
output_kms_key=None,
|
|
2914
|
+
max_runtime_in_seconds=None,
|
|
2915
|
+
base_job_name=None,
|
|
2916
|
+
sagemaker_session=None,
|
|
2917
|
+
env=None,
|
|
2918
|
+
tags=None,
|
|
2919
|
+
network_config=None,
|
|
2920
|
+
):
|
|
2921
|
+
"""Initializes a monitor instance.
|
|
2922
|
+
|
|
2923
|
+
The monitor handles baselining datasets and creating Amazon SageMaker
|
|
2924
|
+
Monitoring Schedules to monitor SageMaker endpoints.
|
|
2925
|
+
|
|
2926
|
+
Args:
|
|
2927
|
+
role (str): An AWS IAM role. The Amazon SageMaker jobs use this role.
|
|
2928
|
+
instance_count (int): The number of instances to run
|
|
2929
|
+
the jobs with.
|
|
2930
|
+
instance_type (str): Type of EC2 instance to use for
|
|
2931
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
2932
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
2933
|
+
to use for storing data during processing (default: 30).
|
|
2934
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
2935
|
+
output_kms_key (str): The KMS key id for the job's outputs.
|
|
2936
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
2937
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
2938
|
+
Default: 3600
|
|
2939
|
+
base_job_name (str): Prefix for the job name. If not specified,
|
|
2940
|
+
a default name is generated based on the training image name and
|
|
2941
|
+
current timestamp.
|
|
2942
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
2943
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
2944
|
+
AWS services needed. If not specified, one is created using
|
|
2945
|
+
the default AWS configuration chain.
|
|
2946
|
+
env (dict): Environment variables to be passed to the job.
|
|
2947
|
+
tags (Optional[Tags]): List of tags to be passed to the job.
|
|
2948
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
2949
|
+
object that configures network isolation, encryption of
|
|
2950
|
+
inter-container traffic, security group IDs, and subnets.
|
|
2951
|
+
"""
|
|
2952
|
+
|
|
2953
|
+
session = sagemaker_session or Session()
|
|
2954
|
+
super(ModelQualityMonitor, self).__init__(
|
|
2955
|
+
role=role,
|
|
2956
|
+
image_uri=ModelQualityMonitor._get_default_image_uri(session.boto_session.region_name),
|
|
2957
|
+
instance_count=instance_count,
|
|
2958
|
+
instance_type=instance_type,
|
|
2959
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
2960
|
+
volume_kms_key=volume_kms_key,
|
|
2961
|
+
output_kms_key=output_kms_key,
|
|
2962
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
2963
|
+
base_job_name=base_job_name,
|
|
2964
|
+
sagemaker_session=session,
|
|
2965
|
+
env=env,
|
|
2966
|
+
tags=format_tags(tags),
|
|
2967
|
+
network_config=network_config,
|
|
2968
|
+
)
|
|
2969
|
+
|
|
2970
|
+
@classmethod
|
|
2971
|
+
def monitoring_type(cls):
|
|
2972
|
+
"""Type of the monitoring job."""
|
|
2973
|
+
return "ModelQuality"
|
|
2974
|
+
|
|
2975
|
+
def suggest_baseline(
|
|
2976
|
+
self,
|
|
2977
|
+
baseline_dataset,
|
|
2978
|
+
dataset_format,
|
|
2979
|
+
problem_type,
|
|
2980
|
+
inference_attribute=None,
|
|
2981
|
+
probability_attribute=None,
|
|
2982
|
+
ground_truth_attribute=None,
|
|
2983
|
+
probability_threshold_attribute=None,
|
|
2984
|
+
post_analytics_processor_script=None,
|
|
2985
|
+
output_s3_uri=None,
|
|
2986
|
+
wait=False,
|
|
2987
|
+
logs=False,
|
|
2988
|
+
job_name=None,
|
|
2989
|
+
):
|
|
2990
|
+
"""Suggest baselines for use with Amazon SageMaker Model Monitoring Schedules.
|
|
2991
|
+
|
|
2992
|
+
Args:
|
|
2993
|
+
baseline_dataset (str): The path to the baseline_dataset file. This can be a local
|
|
2994
|
+
path or an S3 uri.
|
|
2995
|
+
dataset_format (dict): The format of the baseline_dataset.
|
|
2996
|
+
problem_type (str): The type of problem of this model quality monitoring. Valid
|
|
2997
|
+
values are "Regression", "BinaryClassification", "MulticlassClassification".
|
|
2998
|
+
inference_attribute (str): Index or JSONpath to locate predicted label(s).
|
|
2999
|
+
Only used for ModelQualityMonitor.
|
|
3000
|
+
probability_attribute (str or int): Index or JSONpath to locate probabilities.
|
|
3001
|
+
Only used for ModelQualityMonitor.
|
|
3002
|
+
ground_truth_attribute (str): Index to locate actual label(s).
|
|
3003
|
+
Only used for ModelQualityMonitor.
|
|
3004
|
+
probability_threshold_attribute (float): threshold to convert probabilities to binaries
|
|
3005
|
+
Only used for ModelQualityMonitor.
|
|
3006
|
+
post_analytics_processor_script (str): The path to the record post-analytics processor
|
|
3007
|
+
script. This can be a local path or an S3 uri.
|
|
3008
|
+
output_s3_uri (str): Desired S3 destination Destination of the constraint_violations
|
|
3009
|
+
and statistics json files.
|
|
3010
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
3011
|
+
wait (bool): Whether the call should wait until the job completes (default: False).
|
|
3012
|
+
logs (bool): Whether to show the logs produced by the job.
|
|
3013
|
+
Only meaningful when wait is True (default: False).
|
|
3014
|
+
job_name (str): Processing job name. If not specified, the processor generates
|
|
3015
|
+
a default job name, based on the image name and current timestamp.
|
|
3016
|
+
|
|
3017
|
+
Returns:
|
|
3018
|
+
sagemaker.processing.ProcessingJob: The ProcessingJob object representing the
|
|
3019
|
+
baselining job.
|
|
3020
|
+
|
|
3021
|
+
"""
|
|
3022
|
+
self.latest_baselining_job_name = self._generate_baselining_job_name(job_name=job_name)
|
|
3023
|
+
|
|
3024
|
+
normalized_baseline_dataset_input = self._upload_and_convert_to_processing_input(
|
|
3025
|
+
source=baseline_dataset,
|
|
3026
|
+
destination=str(
|
|
3027
|
+
pathlib.PurePosixPath(
|
|
3028
|
+
_CONTAINER_BASE_PATH, _CONTAINER_INPUT_PATH, _BASELINE_DATASET_INPUT_NAME
|
|
3029
|
+
)
|
|
3030
|
+
),
|
|
3031
|
+
name=_BASELINE_DATASET_INPUT_NAME,
|
|
3032
|
+
)
|
|
3033
|
+
|
|
3034
|
+
# Unlike other input, dataset must be a directory for the Monitoring image.
|
|
3035
|
+
baseline_dataset_container_path = normalized_baseline_dataset_input.s3_input.local_path
|
|
3036
|
+
|
|
3037
|
+
normalized_post_processor_script_input = self._upload_and_convert_to_processing_input(
|
|
3038
|
+
source=post_analytics_processor_script,
|
|
3039
|
+
destination=str(
|
|
3040
|
+
pathlib.PurePosixPath(
|
|
3041
|
+
_CONTAINER_BASE_PATH,
|
|
3042
|
+
_CONTAINER_INPUT_PATH,
|
|
3043
|
+
_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
|
|
3044
|
+
)
|
|
3045
|
+
),
|
|
3046
|
+
name=_POST_ANALYTICS_PROCESSOR_SCRIPT_INPUT_NAME,
|
|
3047
|
+
)
|
|
3048
|
+
|
|
3049
|
+
post_processor_script_container_path = None
|
|
3050
|
+
if normalized_post_processor_script_input is not None:
|
|
3051
|
+
post_processor_script_container_path = str(
|
|
3052
|
+
pathlib.PurePosixPath(
|
|
3053
|
+
normalized_post_processor_script_input.s3_input.local_path,
|
|
3054
|
+
os.path.basename(post_analytics_processor_script),
|
|
3055
|
+
)
|
|
3056
|
+
)
|
|
3057
|
+
|
|
3058
|
+
normalized_baseline_output = self._normalize_baseline_output(output_s3_uri=output_s3_uri)
|
|
3059
|
+
|
|
3060
|
+
normalized_env = self._generate_env_map(
|
|
3061
|
+
env=self.env,
|
|
3062
|
+
dataset_format=dataset_format,
|
|
3063
|
+
output_path=normalized_baseline_output.s3_output.local_path,
|
|
3064
|
+
enable_cloudwatch_metrics=False, # Only supported for monitoring schedules
|
|
3065
|
+
dataset_source_container_path=baseline_dataset_container_path,
|
|
3066
|
+
post_processor_script_container_path=post_processor_script_container_path,
|
|
3067
|
+
analysis_type="MODEL_QUALITY",
|
|
3068
|
+
problem_type=problem_type,
|
|
3069
|
+
inference_attribute=inference_attribute,
|
|
3070
|
+
probability_attribute=probability_attribute,
|
|
3071
|
+
ground_truth_attribute=ground_truth_attribute,
|
|
3072
|
+
probability_threshold_attribute=probability_threshold_attribute,
|
|
3073
|
+
)
|
|
3074
|
+
|
|
3075
|
+
baselining_processor = Processor(
|
|
3076
|
+
role=self.role,
|
|
3077
|
+
image_uri=self.image_uri,
|
|
3078
|
+
instance_count=self.instance_count,
|
|
3079
|
+
instance_type=self.instance_type,
|
|
3080
|
+
entrypoint=self.entrypoint,
|
|
3081
|
+
volume_size_in_gb=self.volume_size_in_gb,
|
|
3082
|
+
volume_kms_key=self.volume_kms_key,
|
|
3083
|
+
output_kms_key=self.output_kms_key,
|
|
3084
|
+
max_runtime_in_seconds=self.max_runtime_in_seconds,
|
|
3085
|
+
base_job_name=self.base_job_name,
|
|
3086
|
+
sagemaker_session=self.sagemaker_session,
|
|
3087
|
+
env=normalized_env,
|
|
3088
|
+
tags=self.tags,
|
|
3089
|
+
network_config=self.network_config,
|
|
3090
|
+
)
|
|
3091
|
+
|
|
3092
|
+
baseline_job_inputs_with_nones = [
|
|
3093
|
+
normalized_baseline_dataset_input,
|
|
3094
|
+
normalized_post_processor_script_input,
|
|
3095
|
+
]
|
|
3096
|
+
|
|
3097
|
+
baseline_job_inputs = [
|
|
3098
|
+
baseline_job_input
|
|
3099
|
+
for baseline_job_input in baseline_job_inputs_with_nones
|
|
3100
|
+
if baseline_job_input is not None
|
|
3101
|
+
]
|
|
3102
|
+
|
|
3103
|
+
baselining_processor.run(
|
|
3104
|
+
inputs=baseline_job_inputs,
|
|
3105
|
+
outputs=[normalized_baseline_output],
|
|
3106
|
+
arguments=self.arguments,
|
|
3107
|
+
wait=wait,
|
|
3108
|
+
logs=logs,
|
|
3109
|
+
job_name=self.latest_baselining_job_name,
|
|
3110
|
+
)
|
|
3111
|
+
|
|
3112
|
+
# Create BaseliningJob manually since SageMaker 3.0 ProcessingJob has different attributes
|
|
3113
|
+
self.latest_baselining_job = BaseliningJob(
|
|
3114
|
+
sagemaker_session=self.sagemaker_session,
|
|
3115
|
+
job_name=self.latest_baselining_job_name,
|
|
3116
|
+
inputs=baseline_job_inputs,
|
|
3117
|
+
outputs=[normalized_baseline_output],
|
|
3118
|
+
output_kms_key=None,
|
|
3119
|
+
)
|
|
3120
|
+
self.baselining_jobs.append(self.latest_baselining_job)
|
|
3121
|
+
return baselining_processor.latest_job
|
|
3122
|
+
|
|
3123
|
+
# noinspection PyMethodOverriding
|
|
3124
|
+
def create_monitoring_schedule(
|
|
3125
|
+
self,
|
|
3126
|
+
endpoint_input=None,
|
|
3127
|
+
ground_truth_input=None,
|
|
3128
|
+
problem_type=None,
|
|
3129
|
+
record_preprocessor_script=None,
|
|
3130
|
+
post_analytics_processor_script=None,
|
|
3131
|
+
output_s3_uri=None,
|
|
3132
|
+
constraints=None,
|
|
3133
|
+
monitor_schedule_name=None,
|
|
3134
|
+
schedule_cron_expression=None,
|
|
3135
|
+
enable_cloudwatch_metrics=True,
|
|
3136
|
+
batch_transform_input=None,
|
|
3137
|
+
data_analysis_start_time=None,
|
|
3138
|
+
data_analysis_end_time=None,
|
|
3139
|
+
):
|
|
3140
|
+
"""Creates a monitoring schedule.
|
|
3141
|
+
|
|
3142
|
+
Args:
|
|
3143
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to
|
|
3144
|
+
monitor. This can either be the endpoint name or an EndpointInput.
|
|
3145
|
+
(default: None)
|
|
3146
|
+
ground_truth_input (str): S3 URI to ground truth dataset.
|
|
3147
|
+
(default: None)
|
|
3148
|
+
problem_type (str): The type of problem of this model quality monitoring. Valid
|
|
3149
|
+
values are "Regression", "BinaryClassification", "MulticlassClassification".
|
|
3150
|
+
(default: None)
|
|
3151
|
+
record_preprocessor_script (str): The path to the record preprocessor script. This can
|
|
3152
|
+
be a local path or an S3 uri.
|
|
3153
|
+
post_analytics_processor_script (str): The path to the record post-analytics processor
|
|
3154
|
+
script. This can be a local path or an S3 uri.
|
|
3155
|
+
output_s3_uri (str): S3 destination of the constraint_violations and analysis result.
|
|
3156
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
3157
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided it will be used
|
|
3158
|
+
for monitoring the endpoint. It can be a Constraints object or an S3 uri pointing
|
|
3159
|
+
to a constraints JSON file.
|
|
3160
|
+
monitor_schedule_name (str): Schedule name. If not specified, the processor generates
|
|
3161
|
+
a default job name, based on the image name and current timestamp.
|
|
3162
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
3163
|
+
this job run. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
3164
|
+
expressions. Default: Daily.
|
|
3165
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
3166
|
+
the baselining or monitoring jobs.
|
|
3167
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
3168
|
+
run the monitoring schedule on the batch transform
|
|
3169
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
3170
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
3171
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
3172
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
3173
|
+
"""
|
|
3174
|
+
# we default below two parameters to None in the function signature
|
|
3175
|
+
# but verify they are giving here for positional argument
|
|
3176
|
+
# backward compatibility reason.
|
|
3177
|
+
if not ground_truth_input:
|
|
3178
|
+
raise ValueError("ground_truth_input can not be None.")
|
|
3179
|
+
if not problem_type:
|
|
3180
|
+
raise ValueError("problem_type can not be None.")
|
|
3181
|
+
|
|
3182
|
+
if self.job_definition_name is not None or self.monitoring_schedule_name is not None:
|
|
3183
|
+
message = (
|
|
3184
|
+
"It seems that this object was already used to create an Amazon Model "
|
|
3185
|
+
"Monitoring Schedule. To create another, first delete the existing one "
|
|
3186
|
+
"using my_monitor.delete_monitoring_schedule()."
|
|
3187
|
+
)
|
|
3188
|
+
logger.error(message)
|
|
3189
|
+
raise ValueError(message)
|
|
3190
|
+
|
|
3191
|
+
if (batch_transform_input is not None) ^ (endpoint_input is None):
|
|
3192
|
+
message = (
|
|
3193
|
+
"Need to have either batch_transform_input or endpoint_input to create an "
|
|
3194
|
+
"Amazon Model Monitoring Schedule. "
|
|
3195
|
+
"Please provide only one of the above required inputs"
|
|
3196
|
+
)
|
|
3197
|
+
logger.error(message)
|
|
3198
|
+
raise ValueError(message)
|
|
3199
|
+
|
|
3200
|
+
self._check_monitoring_schedule_cron_validity(
|
|
3201
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
3202
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
3203
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
3204
|
+
)
|
|
3205
|
+
|
|
3206
|
+
# create job definition
|
|
3207
|
+
monitor_schedule_name = self._generate_monitoring_schedule_name(
|
|
3208
|
+
schedule_name=monitor_schedule_name
|
|
3209
|
+
)
|
|
3210
|
+
new_job_definition_name = name_from_base(self.JOB_DEFINITION_BASE_NAME)
|
|
3211
|
+
request_dict = self._build_create_model_quality_job_definition_request(
|
|
3212
|
+
monitoring_schedule_name=monitor_schedule_name,
|
|
3213
|
+
job_definition_name=new_job_definition_name,
|
|
3214
|
+
image_uri=self.image_uri,
|
|
3215
|
+
latest_baselining_job_name=self.latest_baselining_job_name,
|
|
3216
|
+
endpoint_input=endpoint_input,
|
|
3217
|
+
ground_truth_input=ground_truth_input,
|
|
3218
|
+
problem_type=problem_type,
|
|
3219
|
+
record_preprocessor_script=record_preprocessor_script,
|
|
3220
|
+
post_analytics_processor_script=post_analytics_processor_script,
|
|
3221
|
+
output_s3_uri=self._normalize_monitoring_output(
|
|
3222
|
+
monitor_schedule_name, output_s3_uri
|
|
3223
|
+
).s3_output.s3_uri,
|
|
3224
|
+
constraints=constraints,
|
|
3225
|
+
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
|
|
3226
|
+
role=self.role,
|
|
3227
|
+
instance_count=self.instance_count,
|
|
3228
|
+
instance_type=self.instance_type,
|
|
3229
|
+
volume_size_in_gb=self.volume_size_in_gb,
|
|
3230
|
+
volume_kms_key=self.volume_kms_key,
|
|
3231
|
+
output_kms_key=self.output_kms_key,
|
|
3232
|
+
max_runtime_in_seconds=self.max_runtime_in_seconds,
|
|
3233
|
+
env=self.env,
|
|
3234
|
+
tags=self.tags,
|
|
3235
|
+
network_config=self.network_config,
|
|
3236
|
+
batch_transform_input=batch_transform_input,
|
|
3237
|
+
)
|
|
3238
|
+
self.sagemaker_session.sagemaker_client.create_model_quality_job_definition(**request_dict)
|
|
3239
|
+
|
|
3240
|
+
# create schedule
|
|
3241
|
+
try:
|
|
3242
|
+
self._create_monitoring_schedule_from_job_definition(
|
|
3243
|
+
monitor_schedule_name=monitor_schedule_name,
|
|
3244
|
+
job_definition_name=new_job_definition_name,
|
|
3245
|
+
schedule_cron_expression=schedule_cron_expression,
|
|
3246
|
+
data_analysis_end_time=data_analysis_end_time,
|
|
3247
|
+
data_analysis_start_time=data_analysis_start_time,
|
|
3248
|
+
)
|
|
3249
|
+
self.job_definition_name = new_job_definition_name
|
|
3250
|
+
self.monitoring_schedule_name = monitor_schedule_name
|
|
3251
|
+
except Exception:
|
|
3252
|
+
logger.exception("Failed to create monitoring schedule.")
|
|
3253
|
+
self.monitoring_schedule_name = None
|
|
3254
|
+
# noinspection PyBroadException
|
|
3255
|
+
try:
|
|
3256
|
+
self.sagemaker_session.sagemaker_client.delete_model_quality_job_definition(
|
|
3257
|
+
JobDefinitionName=new_job_definition_name
|
|
3258
|
+
)
|
|
3259
|
+
except Exception: # pylint: disable=W0703
|
|
3260
|
+
message = "Failed to delete job definition {}.".format(new_job_definition_name)
|
|
3261
|
+
logger.exception(message)
|
|
3262
|
+
raise
|
|
3263
|
+
|
|
3264
|
+
def update_monitoring_schedule(
|
|
3265
|
+
self,
|
|
3266
|
+
endpoint_input=None,
|
|
3267
|
+
ground_truth_input=None,
|
|
3268
|
+
problem_type=None,
|
|
3269
|
+
record_preprocessor_script=None,
|
|
3270
|
+
post_analytics_processor_script=None,
|
|
3271
|
+
output_s3_uri=None,
|
|
3272
|
+
constraints=None,
|
|
3273
|
+
schedule_cron_expression=None,
|
|
3274
|
+
enable_cloudwatch_metrics=None,
|
|
3275
|
+
role=None,
|
|
3276
|
+
instance_count=None,
|
|
3277
|
+
instance_type=None,
|
|
3278
|
+
volume_size_in_gb=None,
|
|
3279
|
+
volume_kms_key=None,
|
|
3280
|
+
output_kms_key=None,
|
|
3281
|
+
max_runtime_in_seconds=None,
|
|
3282
|
+
env=None,
|
|
3283
|
+
network_config=None,
|
|
3284
|
+
batch_transform_input=None,
|
|
3285
|
+
data_analysis_start_time=None,
|
|
3286
|
+
data_analysis_end_time=None,
|
|
3287
|
+
):
|
|
3288
|
+
"""Updates the existing monitoring schedule.
|
|
3289
|
+
|
|
3290
|
+
If more options than schedule_cron_expression are to be updated, a new job definition will
|
|
3291
|
+
be created to hold them. The old job definition will not be deleted.
|
|
3292
|
+
|
|
3293
|
+
Args:
|
|
3294
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint
|
|
3295
|
+
to monitor. This can either be the endpoint name or an EndpointInput.
|
|
3296
|
+
ground_truth_input (str): S3 URI to ground truth dataset.
|
|
3297
|
+
problem_type (str): The type of problem of this model quality monitoring. Valid values
|
|
3298
|
+
are "Regression", "BinaryClassification", "MulticlassClassification".
|
|
3299
|
+
record_preprocessor_script (str): The path to the record preprocessor script. This can
|
|
3300
|
+
be a local path or an S3 uri.
|
|
3301
|
+
post_analytics_processor_script (str): The path to the record post-analytics processor
|
|
3302
|
+
script. This can be a local path or an S3 uri.
|
|
3303
|
+
output_s3_uri (str): S3 destination of the constraint_violations and analysis result.
|
|
3304
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
3305
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided it will be used
|
|
3306
|
+
for monitoring the endpoint. It can be a Constraints object or an S3 uri pointing
|
|
3307
|
+
to a constraints JSON file.
|
|
3308
|
+
schedule_cron_expression (str): The cron expression that dictates the frequency that
|
|
3309
|
+
this job run. See sagemaker.model_monitor.CronExpressionGenerator for valid
|
|
3310
|
+
expressions. Default: Daily.
|
|
3311
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
3312
|
+
the baselining or monitoring jobs.
|
|
3313
|
+
role (str): An AWS IAM role. The Amazon SageMaker jobs use this role.
|
|
3314
|
+
instance_count (int): The number of instances to run
|
|
3315
|
+
the jobs with.
|
|
3316
|
+
instance_type (str): Type of EC2 instance to use for
|
|
3317
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
3318
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
3319
|
+
to use for storing data during processing (default: 30).
|
|
3320
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
3321
|
+
output_kms_key (str): The KMS key id for the job's outputs.
|
|
3322
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
3323
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
3324
|
+
Default: 3600
|
|
3325
|
+
env (dict): Environment variables to be passed to the job.
|
|
3326
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
3327
|
+
object that configures network isolation, encryption of
|
|
3328
|
+
inter-container traffic, security group IDs, and subnets.
|
|
3329
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
3330
|
+
run the monitoring schedule on the batch transform
|
|
3331
|
+
data_analysis_start_time (str): Start time for the data analysis window
|
|
3332
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
3333
|
+
data_analysis_end_time (str): End time for the data analysis window
|
|
3334
|
+
for the one time monitoring schedule (NOW), e.g. "-PT1H" (default: None)
|
|
3335
|
+
"""
|
|
3336
|
+
valid_args = {
|
|
3337
|
+
arg: value for arg, value in locals().items() if arg != "self" and value is not None
|
|
3338
|
+
}
|
|
3339
|
+
|
|
3340
|
+
# Nothing to update
|
|
3341
|
+
if len(valid_args) <= 0:
|
|
3342
|
+
return
|
|
3343
|
+
|
|
3344
|
+
# Only need to update schedule expression
|
|
3345
|
+
if (
|
|
3346
|
+
len(valid_args) == 1
|
|
3347
|
+
and schedule_cron_expression is not None
|
|
3348
|
+
and schedule_cron_expression != CronExpressionGenerator.now()
|
|
3349
|
+
):
|
|
3350
|
+
self._update_monitoring_schedule(self.job_definition_name, schedule_cron_expression)
|
|
3351
|
+
return
|
|
3352
|
+
|
|
3353
|
+
if (batch_transform_input is not None) and (endpoint_input is not None):
|
|
3354
|
+
message = (
|
|
3355
|
+
"Cannot update both batch_transform_input and endpoint_input to update an "
|
|
3356
|
+
"Amazon Model Monitoring Schedule. "
|
|
3357
|
+
"Please provide atmost one of the above required inputs"
|
|
3358
|
+
)
|
|
3359
|
+
logger.error(message)
|
|
3360
|
+
raise ValueError(message)
|
|
3361
|
+
|
|
3362
|
+
# Need to update schedule with a new job definition
|
|
3363
|
+
job_desc = self.sagemaker_session.sagemaker_client.describe_model_quality_job_definition(
|
|
3364
|
+
JobDefinitionName=self.job_definition_name
|
|
3365
|
+
)
|
|
3366
|
+
new_job_definition_name = name_from_base(self.JOB_DEFINITION_BASE_NAME)
|
|
3367
|
+
request_dict = self._build_create_model_quality_job_definition_request(
|
|
3368
|
+
monitoring_schedule_name=self.monitoring_schedule_name,
|
|
3369
|
+
job_definition_name=new_job_definition_name,
|
|
3370
|
+
image_uri=self.image_uri,
|
|
3371
|
+
existing_job_desc=job_desc,
|
|
3372
|
+
endpoint_input=endpoint_input,
|
|
3373
|
+
ground_truth_input=ground_truth_input,
|
|
3374
|
+
problem_type=problem_type,
|
|
3375
|
+
record_preprocessor_script=record_preprocessor_script,
|
|
3376
|
+
post_analytics_processor_script=post_analytics_processor_script,
|
|
3377
|
+
output_s3_uri=output_s3_uri,
|
|
3378
|
+
constraints=constraints,
|
|
3379
|
+
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
|
|
3380
|
+
role=role,
|
|
3381
|
+
instance_count=instance_count,
|
|
3382
|
+
instance_type=instance_type,
|
|
3383
|
+
volume_size_in_gb=volume_size_in_gb,
|
|
3384
|
+
volume_kms_key=volume_kms_key,
|
|
3385
|
+
output_kms_key=output_kms_key,
|
|
3386
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
3387
|
+
env=env,
|
|
3388
|
+
tags=self.tags,
|
|
3389
|
+
network_config=network_config,
|
|
3390
|
+
batch_transform_input=batch_transform_input,
|
|
3391
|
+
)
|
|
3392
|
+
self.sagemaker_session.sagemaker_client.create_model_quality_job_definition(**request_dict)
|
|
3393
|
+
try:
|
|
3394
|
+
self._update_monitoring_schedule(
|
|
3395
|
+
new_job_definition_name,
|
|
3396
|
+
schedule_cron_expression,
|
|
3397
|
+
data_analysis_start_time,
|
|
3398
|
+
data_analysis_end_time,
|
|
3399
|
+
)
|
|
3400
|
+
self.job_definition_name = new_job_definition_name
|
|
3401
|
+
if role is not None:
|
|
3402
|
+
self.role = role
|
|
3403
|
+
if instance_count is not None:
|
|
3404
|
+
self.instance_count = instance_count
|
|
3405
|
+
if instance_type is not None:
|
|
3406
|
+
self.instance_type = instance_type
|
|
3407
|
+
if volume_size_in_gb is not None:
|
|
3408
|
+
self.volume_size_in_gb = volume_size_in_gb
|
|
3409
|
+
if volume_kms_key is not None:
|
|
3410
|
+
self.volume_kms_key = volume_kms_key
|
|
3411
|
+
if output_kms_key is not None:
|
|
3412
|
+
self.output_kms_key = output_kms_key
|
|
3413
|
+
if max_runtime_in_seconds is not None:
|
|
3414
|
+
self.max_runtime_in_seconds = max_runtime_in_seconds
|
|
3415
|
+
if env is not None:
|
|
3416
|
+
self.env = env
|
|
3417
|
+
if network_config is not None:
|
|
3418
|
+
self.network_config = network_config
|
|
3419
|
+
except Exception:
|
|
3420
|
+
logger.exception("Failed to update monitoring schedule.")
|
|
3421
|
+
# noinspection PyBroadException
|
|
3422
|
+
try:
|
|
3423
|
+
self.sagemaker_session.sagemaker_client.delete_model_quality_job_definition(
|
|
3424
|
+
JobDefinitionName=new_job_definition_name
|
|
3425
|
+
)
|
|
3426
|
+
except Exception: # pylint: disable=W0703
|
|
3427
|
+
message = "Failed to delete job definition {}.".format(new_job_definition_name)
|
|
3428
|
+
logger.exception(message)
|
|
3429
|
+
raise
|
|
3430
|
+
|
|
3431
|
+
def delete_monitoring_schedule(self):
|
|
3432
|
+
"""Deletes the monitoring schedule and its job definition."""
|
|
3433
|
+
super(ModelQualityMonitor, self).delete_monitoring_schedule()
|
|
3434
|
+
# Delete job definition.
|
|
3435
|
+
message = "Deleting Model Quality Job Definition with name: {}".format(
|
|
3436
|
+
self.job_definition_name
|
|
3437
|
+
)
|
|
3438
|
+
logger.info(message)
|
|
3439
|
+
self.sagemaker_session.sagemaker_client.delete_model_quality_job_definition(
|
|
3440
|
+
JobDefinitionName=self.job_definition_name
|
|
3441
|
+
)
|
|
3442
|
+
self.job_definition_name = None
|
|
3443
|
+
|
|
3444
|
+
@classmethod
|
|
3445
|
+
def attach(cls, monitor_schedule_name, sagemaker_session=None):
|
|
3446
|
+
"""Sets this object's schedule name to the name provided.
|
|
3447
|
+
|
|
3448
|
+
This allows subsequent describe_schedule or list_executions calls to point
|
|
3449
|
+
to the given schedule.
|
|
3450
|
+
|
|
3451
|
+
Args:
|
|
3452
|
+
monitor_schedule_name (str): The name of the schedule to attach to.
|
|
3453
|
+
sagemaker_session (sagemaker.core.helper.session.Session): Session object which
|
|
3454
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
3455
|
+
AWS services needed. If not specified, one is created using
|
|
3456
|
+
the default AWS configuration chain.
|
|
3457
|
+
"""
|
|
3458
|
+
sagemaker_session = sagemaker_session or Session()
|
|
3459
|
+
schedule_desc = boto_describe_monitoring_schedule(
|
|
3460
|
+
sagemaker_session=sagemaker_session, monitoring_schedule_name=monitor_schedule_name
|
|
3461
|
+
)
|
|
3462
|
+
monitoring_type = schedule_desc["MonitoringScheduleConfig"].get("MonitoringType")
|
|
3463
|
+
if monitoring_type != cls.monitoring_type():
|
|
3464
|
+
raise TypeError(
|
|
3465
|
+
"{} can only attach to ModelQuality monitoring schedule.".format(__class__.__name__)
|
|
3466
|
+
)
|
|
3467
|
+
job_definition_name = schedule_desc["MonitoringScheduleConfig"][
|
|
3468
|
+
"MonitoringJobDefinitionName"
|
|
3469
|
+
]
|
|
3470
|
+
job_desc = sagemaker_session.sagemaker_client.describe_model_quality_job_definition(
|
|
3471
|
+
JobDefinitionName=job_definition_name
|
|
3472
|
+
)
|
|
3473
|
+
tags = list_tags(
|
|
3474
|
+
sagemaker_session=sagemaker_session, resource_arn=schedule_desc["MonitoringScheduleArn"]
|
|
3475
|
+
)
|
|
3476
|
+
return ModelMonitor._attach(
|
|
3477
|
+
clazz=cls,
|
|
3478
|
+
sagemaker_session=sagemaker_session,
|
|
3479
|
+
schedule_desc=schedule_desc,
|
|
3480
|
+
job_desc=job_desc,
|
|
3481
|
+
tags=tags,
|
|
3482
|
+
)
|
|
3483
|
+
|
|
3484
|
+
def _build_create_model_quality_job_definition_request(
|
|
3485
|
+
self,
|
|
3486
|
+
monitoring_schedule_name,
|
|
3487
|
+
job_definition_name,
|
|
3488
|
+
image_uri,
|
|
3489
|
+
latest_baselining_job_name=None,
|
|
3490
|
+
existing_job_desc=None,
|
|
3491
|
+
endpoint_input=None,
|
|
3492
|
+
ground_truth_input=None,
|
|
3493
|
+
problem_type=None,
|
|
3494
|
+
record_preprocessor_script=None,
|
|
3495
|
+
post_analytics_processor_script=None,
|
|
3496
|
+
output_s3_uri=None,
|
|
3497
|
+
constraints=None,
|
|
3498
|
+
enable_cloudwatch_metrics=None,
|
|
3499
|
+
role=None,
|
|
3500
|
+
instance_count=None,
|
|
3501
|
+
instance_type=None,
|
|
3502
|
+
volume_size_in_gb=None,
|
|
3503
|
+
volume_kms_key=None,
|
|
3504
|
+
output_kms_key=None,
|
|
3505
|
+
max_runtime_in_seconds=None,
|
|
3506
|
+
env=None,
|
|
3507
|
+
tags=None,
|
|
3508
|
+
network_config=None,
|
|
3509
|
+
batch_transform_input=None,
|
|
3510
|
+
):
|
|
3511
|
+
"""Build the request for job definition creation API
|
|
3512
|
+
|
|
3513
|
+
Args:
|
|
3514
|
+
monitoring_schedule_name (str): Monitoring schedule name.
|
|
3515
|
+
job_definition_name (str): Job definition name.
|
|
3516
|
+
If not specified then a default one will be generated.
|
|
3517
|
+
image_uri (str): The uri of the image to use for the jobs started by the Monitor.
|
|
3518
|
+
latest_baselining_job_name (str): name of the last baselining job.
|
|
3519
|
+
existing_job_desc (dict): description of existing job definition. It will be updated by
|
|
3520
|
+
values that were passed in, and then used to create the new job definition.
|
|
3521
|
+
endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to monitor.
|
|
3522
|
+
This can either be the endpoint name or an EndpointInput.
|
|
3523
|
+
ground_truth_input (str): S3 URI to ground truth dataset.
|
|
3524
|
+
problem_type (str): The type of problem of this model quality monitoring. Valid
|
|
3525
|
+
values are "Regression", "BinaryClassification", "MulticlassClassification".
|
|
3526
|
+
output_s3_uri (str): S3 destination of the constraint_violations and analysis result.
|
|
3527
|
+
Default: "s3://<default_session_bucket>/<job_name>/output"
|
|
3528
|
+
constraints (sagemaker.model_monitor.Constraints or str): If provided it will be used
|
|
3529
|
+
for monitoring the endpoint. It can be a Constraints object or an S3 uri pointing
|
|
3530
|
+
to a constraints JSON file.
|
|
3531
|
+
enable_cloudwatch_metrics (bool): Whether to publish cloudwatch metrics as part of
|
|
3532
|
+
the baselining or monitoring jobs.
|
|
3533
|
+
role (str): An AWS IAM role. The Amazon SageMaker jobs use this role.
|
|
3534
|
+
instance_count (int): The number of instances to run
|
|
3535
|
+
the jobs with.
|
|
3536
|
+
instance_type (str): Type of EC2 instance to use for
|
|
3537
|
+
the job, for example, 'ml.m5.xlarge'.
|
|
3538
|
+
volume_size_in_gb (int): Size in GB of the EBS volume
|
|
3539
|
+
to use for storing data during processing (default: 30).
|
|
3540
|
+
volume_kms_key (str): A KMS key for the job's volume.
|
|
3541
|
+
output_kms_key (str): KMS key id for output.
|
|
3542
|
+
max_runtime_in_seconds (int): Timeout in seconds. After this amount of
|
|
3543
|
+
time, Amazon SageMaker terminates the job regardless of its current status.
|
|
3544
|
+
Default: 3600
|
|
3545
|
+
env (dict): Environment variables to be passed to the job.
|
|
3546
|
+
tags (Optional[Tags]): List of tags to be passed to the job.
|
|
3547
|
+
network_config (sagemaker.network.NetworkConfig): A NetworkConfig
|
|
3548
|
+
object that configures network isolation, encryption of
|
|
3549
|
+
inter-container traffic, security group IDs, and subnets.
|
|
3550
|
+
batch_transform_input (sagemaker.model_monitor.BatchTransformInput): Inputs to
|
|
3551
|
+
run the monitoring schedule on the batch transform
|
|
3552
|
+
|
|
3553
|
+
Returns:
|
|
3554
|
+
dict: request parameters to create job definition.
|
|
3555
|
+
"""
|
|
3556
|
+
if existing_job_desc is not None:
|
|
3557
|
+
app_specification = existing_job_desc[
|
|
3558
|
+
"{}AppSpecification".format(self.monitoring_type())
|
|
3559
|
+
]
|
|
3560
|
+
baseline_config = existing_job_desc.get(
|
|
3561
|
+
"{}BaselineConfig".format(self.monitoring_type()), {}
|
|
3562
|
+
)
|
|
3563
|
+
job_input = existing_job_desc["{}JobInput".format(self.monitoring_type())]
|
|
3564
|
+
job_output = existing_job_desc["{}JobOutputConfig".format(self.monitoring_type())]
|
|
3565
|
+
cluster_config = existing_job_desc["JobResources"]["ClusterConfig"]
|
|
3566
|
+
if role is None:
|
|
3567
|
+
role = existing_job_desc["RoleArn"]
|
|
3568
|
+
existing_network_config = existing_job_desc.get("NetworkConfig")
|
|
3569
|
+
stop_condition = existing_job_desc.get("StoppingCondition", {})
|
|
3570
|
+
else:
|
|
3571
|
+
app_specification = {}
|
|
3572
|
+
baseline_config = {}
|
|
3573
|
+
job_input = {}
|
|
3574
|
+
job_output = {}
|
|
3575
|
+
cluster_config = {}
|
|
3576
|
+
existing_network_config = None
|
|
3577
|
+
stop_condition = {}
|
|
3578
|
+
|
|
3579
|
+
# app specification
|
|
3580
|
+
app_specification["ImageUri"] = image_uri
|
|
3581
|
+
if problem_type is not None:
|
|
3582
|
+
app_specification["ProblemType"] = problem_type
|
|
3583
|
+
record_preprocessor_script_s3_uri = None
|
|
3584
|
+
if record_preprocessor_script is not None:
|
|
3585
|
+
record_preprocessor_script_s3_uri = self._s3_uri_from_local_path(
|
|
3586
|
+
path=record_preprocessor_script
|
|
3587
|
+
)
|
|
3588
|
+
|
|
3589
|
+
post_analytics_processor_script_s3_uri = None
|
|
3590
|
+
if post_analytics_processor_script is not None:
|
|
3591
|
+
post_analytics_processor_script_s3_uri = self._s3_uri_from_local_path(
|
|
3592
|
+
path=post_analytics_processor_script
|
|
3593
|
+
)
|
|
3594
|
+
|
|
3595
|
+
if post_analytics_processor_script_s3_uri:
|
|
3596
|
+
app_specification["PostAnalyticsProcessorSourceUri"] = (
|
|
3597
|
+
post_analytics_processor_script_s3_uri
|
|
3598
|
+
)
|
|
3599
|
+
if record_preprocessor_script_s3_uri:
|
|
3600
|
+
app_specification["RecordPreprocessorSourceUri"] = record_preprocessor_script_s3_uri
|
|
3601
|
+
|
|
3602
|
+
normalized_env = self._generate_env_map(
|
|
3603
|
+
env=env, enable_cloudwatch_metrics=enable_cloudwatch_metrics
|
|
3604
|
+
)
|
|
3605
|
+
if normalized_env:
|
|
3606
|
+
app_specification["Environment"] = normalized_env
|
|
3607
|
+
|
|
3608
|
+
# baseline config
|
|
3609
|
+
if constraints:
|
|
3610
|
+
# noinspection PyTypeChecker
|
|
3611
|
+
_, constraints_object = self._get_baseline_files(
|
|
3612
|
+
statistics=None, constraints=constraints, sagemaker_session=self.sagemaker_session
|
|
3613
|
+
)
|
|
3614
|
+
constraints_s3_uri = None
|
|
3615
|
+
if constraints_object is not None:
|
|
3616
|
+
constraints_s3_uri = constraints_object.file_s3_uri
|
|
3617
|
+
baseline_config["ConstraintsResource"] = dict(S3Uri=constraints_s3_uri)
|
|
3618
|
+
if latest_baselining_job_name:
|
|
3619
|
+
baseline_config["BaseliningJobName"] = latest_baselining_job_name
|
|
3620
|
+
|
|
3621
|
+
# job input
|
|
3622
|
+
if endpoint_input is not None:
|
|
3623
|
+
normalized_endpoint_input = self._normalize_endpoint_input(
|
|
3624
|
+
endpoint_input=endpoint_input
|
|
3625
|
+
)
|
|
3626
|
+
job_input = normalized_endpoint_input._to_request_dict()
|
|
3627
|
+
elif batch_transform_input is not None:
|
|
3628
|
+
job_input = batch_transform_input._to_request_dict()
|
|
3629
|
+
|
|
3630
|
+
if ground_truth_input is not None:
|
|
3631
|
+
job_input["GroundTruthS3Input"] = dict(S3Uri=ground_truth_input)
|
|
3632
|
+
|
|
3633
|
+
# job output
|
|
3634
|
+
if output_s3_uri is not None:
|
|
3635
|
+
normalized_monitoring_output = self._normalize_monitoring_output(
|
|
3636
|
+
monitoring_schedule_name, output_s3_uri
|
|
3637
|
+
)
|
|
3638
|
+
job_output["MonitoringOutputs"] = [normalized_monitoring_output._to_request_dict()]
|
|
3639
|
+
if output_kms_key is not None:
|
|
3640
|
+
job_output["KmsKeyId"] = output_kms_key
|
|
3641
|
+
|
|
3642
|
+
# cluster config
|
|
3643
|
+
if instance_count is not None:
|
|
3644
|
+
cluster_config["InstanceCount"] = instance_count
|
|
3645
|
+
if instance_type is not None:
|
|
3646
|
+
cluster_config["InstanceType"] = instance_type
|
|
3647
|
+
if volume_size_in_gb is not None:
|
|
3648
|
+
cluster_config["VolumeSizeInGB"] = volume_size_in_gb
|
|
3649
|
+
if volume_kms_key is not None:
|
|
3650
|
+
cluster_config["VolumeKmsKeyId"] = volume_kms_key
|
|
3651
|
+
|
|
3652
|
+
# stop condition
|
|
3653
|
+
if max_runtime_in_seconds is not None:
|
|
3654
|
+
stop_condition["MaxRuntimeInSeconds"] = max_runtime_in_seconds
|
|
3655
|
+
|
|
3656
|
+
request_dict = {
|
|
3657
|
+
"JobDefinitionName": job_definition_name,
|
|
3658
|
+
"{}AppSpecification".format(self.monitoring_type()): app_specification,
|
|
3659
|
+
"{}JobInput".format(self.monitoring_type()): job_input,
|
|
3660
|
+
"{}JobOutputConfig".format(self.monitoring_type()): job_output,
|
|
3661
|
+
"JobResources": dict(ClusterConfig=cluster_config),
|
|
3662
|
+
"RoleArn": expand_role(self.sagemaker_session, role),
|
|
3663
|
+
}
|
|
3664
|
+
|
|
3665
|
+
if baseline_config:
|
|
3666
|
+
request_dict["{}BaselineConfig".format(self.monitoring_type())] = baseline_config
|
|
3667
|
+
|
|
3668
|
+
if network_config is not None:
|
|
3669
|
+
network_config_dict = network_config._to_request_dict()
|
|
3670
|
+
request_dict["NetworkConfig"] = network_config_dict
|
|
3671
|
+
elif existing_network_config is not None:
|
|
3672
|
+
request_dict["NetworkConfig"] = existing_network_config
|
|
3673
|
+
|
|
3674
|
+
if stop_condition:
|
|
3675
|
+
request_dict["StoppingCondition"] = stop_condition
|
|
3676
|
+
|
|
3677
|
+
if tags is not None:
|
|
3678
|
+
request_dict["Tags"] = format_tags(tags)
|
|
3679
|
+
|
|
3680
|
+
return request_dict
|
|
3681
|
+
|
|
3682
|
+
@staticmethod
|
|
3683
|
+
def _get_default_image_uri(region):
|
|
3684
|
+
"""Returns the Default Model Monitoring image uri based on the region.
|
|
3685
|
+
|
|
3686
|
+
Args:
|
|
3687
|
+
region (str): The AWS region.
|
|
3688
|
+
|
|
3689
|
+
Returns:
|
|
3690
|
+
str: The Default Model Monitoring image uri based on the region.
|
|
3691
|
+
"""
|
|
3692
|
+
return image_uris.retrieve(framework=framework_name, region=region)
|
|
3693
|
+
|
|
3694
|
+
|
|
3695
|
+
class BaseliningJob:
|
|
3696
|
+
"""Provides functionality to retrieve baseline-specific files output from baselining job."""
|
|
3697
|
+
|
|
3698
|
+
def __init__(self, sagemaker_session, job_name, inputs, outputs, output_kms_key=None):
|
|
3699
|
+
"""Initializes a Baselining job.
|
|
3700
|
+
|
|
3701
|
+
It tracks a baselining job kicked off by the suggest workflow.
|
|
3702
|
+
|
|
3703
|
+
Args:
|
|
3704
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
3705
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
3706
|
+
AWS services needed. If not specified, one is created using
|
|
3707
|
+
the default AWS configuration chain.
|
|
3708
|
+
job_name (str): Name of the Amazon SageMaker Model Monitoring Baselining Job.
|
|
3709
|
+
inputs ([sagemaker.processing.ProcessingInput]): A list of ProcessingInput objects.
|
|
3710
|
+
outputs ([sagemaker.processing.ProcessingOutput]): A list of ProcessingOutput objects.
|
|
3711
|
+
output_kms_key (str): The output kms key associated with the job. Defaults to None
|
|
3712
|
+
if not provided.
|
|
3713
|
+
|
|
3714
|
+
"""
|
|
3715
|
+
self.sagemaker_session = sagemaker_session
|
|
3716
|
+
self.job_name = job_name
|
|
3717
|
+
self.inputs = inputs
|
|
3718
|
+
self.outputs = outputs
|
|
3719
|
+
self.output_kms_key = output_kms_key
|
|
3720
|
+
|
|
3721
|
+
def describe(self):
|
|
3722
|
+
"""Describe the processing job."""
|
|
3723
|
+
return self.sagemaker_session.sagemaker_client.describe_processing_job(
|
|
3724
|
+
ProcessingJobName=self.job_name
|
|
3725
|
+
)
|
|
3726
|
+
|
|
3727
|
+
@classmethod
|
|
3728
|
+
def from_processing_job(cls, processing_job):
|
|
3729
|
+
"""Initializes a Baselining job from a processing job.
|
|
3730
|
+
|
|
3731
|
+
Args:
|
|
3732
|
+
processing_job (sagemaker.processing.ProcessingJob): The ProcessingJob used for
|
|
3733
|
+
baselining instance.
|
|
3734
|
+
|
|
3735
|
+
Returns:
|
|
3736
|
+
sagemaker.processing.BaseliningJob: The instance of ProcessingJob created
|
|
3737
|
+
using the current job name.
|
|
3738
|
+
|
|
3739
|
+
"""
|
|
3740
|
+
return cls(
|
|
3741
|
+
processing_job.sagemaker_session,
|
|
3742
|
+
processing_job.job_name,
|
|
3743
|
+
processing_job.inputs,
|
|
3744
|
+
processing_job.outputs,
|
|
3745
|
+
processing_job.output_kms_key,
|
|
3746
|
+
)
|
|
3747
|
+
|
|
3748
|
+
def baseline_statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_key=None):
|
|
3749
|
+
"""Returns a sagemaker.model_monitor.
|
|
3750
|
+
|
|
3751
|
+
Statistics object representing the statistics JSON file generated by this baselining job.
|
|
3752
|
+
|
|
3753
|
+
Args:
|
|
3754
|
+
file_name (str): The name of the json-formatted statistics file
|
|
3755
|
+
kms_key (str): The kms key to use when retrieving the file.
|
|
3756
|
+
|
|
3757
|
+
Returns:
|
|
3758
|
+
sagemaker.model_monitor.Statistics: The Statistics object representing the file that
|
|
3759
|
+
was generated by the job.
|
|
3760
|
+
|
|
3761
|
+
Raises:
|
|
3762
|
+
UnexpectedStatusException: This is thrown if the job is not in a 'Complete' state.
|
|
3763
|
+
|
|
3764
|
+
"""
|
|
3765
|
+
try:
|
|
3766
|
+
baselining_job_output_s3_path = self.outputs[0].s3_output.s3_uri
|
|
3767
|
+
return Statistics.from_s3_uri(
|
|
3768
|
+
statistics_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
|
|
3769
|
+
kms_key=kms_key,
|
|
3770
|
+
sagemaker_session=self.sagemaker_session,
|
|
3771
|
+
)
|
|
3772
|
+
except ClientError as client_error:
|
|
3773
|
+
if client_error.response["Error"]["Code"] == "NoSuchKey":
|
|
3774
|
+
status = self.sagemaker_session.sagemaker_client.describe_processing_job(
|
|
3775
|
+
ProcessingJobName=processing_job_name
|
|
3776
|
+
)["ProcessingJobStatus"]
|
|
3777
|
+
if status != "Completed":
|
|
3778
|
+
raise UnexpectedStatusException(
|
|
3779
|
+
message="The underlying job is not in 'Completed' state. You may only "
|
|
3780
|
+
"retrieve files for a job that has completed successfully.",
|
|
3781
|
+
allowed_statuses="Completed",
|
|
3782
|
+
actual_status=status,
|
|
3783
|
+
)
|
|
3784
|
+
else:
|
|
3785
|
+
raise client_error
|
|
3786
|
+
|
|
3787
|
+
def suggested_constraints(self, file_name=CONSTRAINTS_JSON_DEFAULT_FILE_NAME, kms_key=None):
|
|
3788
|
+
"""Returns a sagemaker.model_monitor.
|
|
3789
|
+
|
|
3790
|
+
Constraints object representing the constraints JSON file generated by this baselining job.
|
|
3791
|
+
|
|
3792
|
+
Args:
|
|
3793
|
+
file_name (str): The name of the json-formatted constraints file
|
|
3794
|
+
kms_key (str): The kms key to use when retrieving the file.
|
|
3795
|
+
|
|
3796
|
+
Returns:
|
|
3797
|
+
sagemaker.model_monitor.Constraints: The Constraints object representing the file that
|
|
3798
|
+
was generated by the job.
|
|
3799
|
+
|
|
3800
|
+
Raises:
|
|
3801
|
+
UnexpectedStatusException: This is thrown if the job is not in a 'Complete' state.
|
|
3802
|
+
|
|
3803
|
+
"""
|
|
3804
|
+
try:
|
|
3805
|
+
baselining_job_output_s3_path = self.outputs[0].s3_output.s3_uri
|
|
3806
|
+
return Constraints.from_s3_uri(
|
|
3807
|
+
constraints_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
|
|
3808
|
+
kms_key=kms_key,
|
|
3809
|
+
sagemaker_session=self.sagemaker_session,
|
|
3810
|
+
)
|
|
3811
|
+
except ClientError as client_error:
|
|
3812
|
+
if client_error.response["Error"]["Code"] == "NoSuchKey":
|
|
3813
|
+
status = self.sagemaker_session.sagemaker_client.describe_processing_job(
|
|
3814
|
+
ProcessingJobName=processing_job_name
|
|
3815
|
+
)["ProcessingJobStatus"]
|
|
3816
|
+
if status != "Completed":
|
|
3817
|
+
raise UnexpectedStatusException(
|
|
3818
|
+
message="The underlying job is not in 'Completed' state. You may only "
|
|
3819
|
+
"retrieve files for a job that has completed successfully.",
|
|
3820
|
+
allowed_statuses="Completed",
|
|
3821
|
+
actual_status=status,
|
|
3822
|
+
)
|
|
3823
|
+
else:
|
|
3824
|
+
raise client_error
|
|
3825
|
+
|
|
3826
|
+
|
|
3827
|
+
class MonitoringExecution(ProcessingJob):
|
|
3828
|
+
"""Provides functionality to retrieve monitoring-specific files from monitoring executions."""
|
|
3829
|
+
|
|
3830
|
+
def __init__(self, sagemaker_session, job_name, inputs, output, output_kms_key=None):
|
|
3831
|
+
"""Initializes a MonitoringExecution job that tracks a monitoring execution.
|
|
3832
|
+
|
|
3833
|
+
Its kicked off by an Amazon SageMaker Model Monitoring Schedule.
|
|
3834
|
+
|
|
3835
|
+
Args:
|
|
3836
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
3837
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
3838
|
+
AWS services needed. If not specified, one is created using
|
|
3839
|
+
the default AWS configuration chain.
|
|
3840
|
+
job_name (str): The name of the monitoring execution job.
|
|
3841
|
+
output (sagemaker.Processing.ProcessingOutput): The output associated with the
|
|
3842
|
+
monitoring execution.
|
|
3843
|
+
output_kms_key (str): The output kms key associated with the job. Defaults to None
|
|
3844
|
+
if not provided.
|
|
3845
|
+
|
|
3846
|
+
"""
|
|
3847
|
+
from sagemaker.core.shapes import ProcessingOutputConfig
|
|
3848
|
+
|
|
3849
|
+
super(MonitoringExecution, self).__init__(
|
|
3850
|
+
processing_job_name=job_name,
|
|
3851
|
+
processing_inputs=inputs,
|
|
3852
|
+
processing_output_config=(
|
|
3853
|
+
ProcessingOutputConfig(outputs=[output], kms_key_id=output_kms_key)
|
|
3854
|
+
if output_kms_key
|
|
3855
|
+
else ProcessingOutputConfig(outputs=[output])
|
|
3856
|
+
),
|
|
3857
|
+
)
|
|
3858
|
+
object.__setattr__(self, "sagemaker_session", sagemaker_session)
|
|
3859
|
+
|
|
3860
|
+
@property
|
|
3861
|
+
def output(self):
|
|
3862
|
+
"""Get the first output from processing_output_config."""
|
|
3863
|
+
return self.processing_output_config.outputs[0]
|
|
3864
|
+
|
|
3865
|
+
@property
|
|
3866
|
+
def outputs(self):
|
|
3867
|
+
"""Get all outputs from processing_output_config."""
|
|
3868
|
+
return self.processing_output_config.outputs
|
|
3869
|
+
|
|
3870
|
+
def describe(self):
|
|
3871
|
+
"""Describe the processing job."""
|
|
3872
|
+
return self.sagemaker_session.sagemaker_client.describe_processing_job(
|
|
3873
|
+
ProcessingJobName=self.processing_job_name
|
|
3874
|
+
)
|
|
3875
|
+
|
|
3876
|
+
@classmethod
|
|
3877
|
+
def from_processing_arn(cls, sagemaker_session, processing_job_arn):
|
|
3878
|
+
"""Initializes a Baselining job from a processing arn.
|
|
3879
|
+
|
|
3880
|
+
Args:
|
|
3881
|
+
processing_job_arn (str): ARN of the processing job to create a MonitoringExecution
|
|
3882
|
+
out of.
|
|
3883
|
+
sagemaker_session (sagemaker.core.helper.session_helper.Session): Session object which
|
|
3884
|
+
manages interactions with Amazon SageMaker APIs and any other
|
|
3885
|
+
AWS services needed. If not specified, one is created using
|
|
3886
|
+
the default AWS configuration chain.
|
|
3887
|
+
|
|
3888
|
+
Returns:
|
|
3889
|
+
sagemaker.processing.BaseliningJob: The instance of ProcessingJob created
|
|
3890
|
+
using the current job name.
|
|
3891
|
+
|
|
3892
|
+
"""
|
|
3893
|
+
processing_job_name = processing_job_arn.split(":")[5][
|
|
3894
|
+
len("processing-job/") :
|
|
3895
|
+
] # This is necessary while the API only vends an arn.
|
|
3896
|
+
job_desc = sagemaker_session.sagemaker_client.describe_processing_job(
|
|
3897
|
+
ProcessingJobName=processing_job_name
|
|
3898
|
+
)
|
|
3899
|
+
|
|
3900
|
+
output_config = job_desc["ProcessingOutputConfig"]["Outputs"][0]
|
|
3901
|
+
return cls(
|
|
3902
|
+
sagemaker_session=sagemaker_session,
|
|
3903
|
+
job_name=processing_job_name,
|
|
3904
|
+
inputs=[
|
|
3905
|
+
ProcessingInput(
|
|
3906
|
+
input_name=processing_input["InputName"],
|
|
3907
|
+
s3_input=ProcessingS3Input(
|
|
3908
|
+
s3_uri=processing_input["S3Input"]["S3Uri"],
|
|
3909
|
+
local_path=processing_input["S3Input"]["LocalPath"],
|
|
3910
|
+
s3_data_type=processing_input["S3Input"].get("S3DataType"),
|
|
3911
|
+
s3_input_mode=processing_input["S3Input"].get("S3InputMode"),
|
|
3912
|
+
s3_data_distribution_type=processing_input["S3Input"].get(
|
|
3913
|
+
"S3DataDistributionType"
|
|
3914
|
+
),
|
|
3915
|
+
s3_compression_type=processing_input["S3Input"].get("S3CompressionType"),
|
|
3916
|
+
),
|
|
3917
|
+
)
|
|
3918
|
+
for processing_input in job_desc["ProcessingInputs"]
|
|
3919
|
+
],
|
|
3920
|
+
output=ProcessingOutput(
|
|
3921
|
+
output_name=output_config["OutputName"],
|
|
3922
|
+
s3_output=ProcessingS3Output(
|
|
3923
|
+
local_path=output_config["S3Output"]["LocalPath"],
|
|
3924
|
+
s3_uri=output_config["S3Output"]["S3Uri"],
|
|
3925
|
+
s3_upload_mode=output_config["S3Output"].get("S3UploadMode", "EndOfJob"),
|
|
3926
|
+
),
|
|
3927
|
+
),
|
|
3928
|
+
output_kms_key=job_desc["ProcessingOutputConfig"].get("KmsKeyId"),
|
|
3929
|
+
)
|
|
3930
|
+
|
|
3931
|
+
def statistics(self, file_name=STATISTICS_JSON_DEFAULT_FILE_NAME, kms_key=None):
|
|
3932
|
+
"""Returns a sagemaker.model_monitor.
|
|
3933
|
+
|
|
3934
|
+
Statistics object representing the statistics
|
|
3935
|
+
JSON file generated by this monitoring execution.
|
|
3936
|
+
|
|
3937
|
+
Args:
|
|
3938
|
+
file_name (str): The name of the json-formatted statistics file
|
|
3939
|
+
kms_key (str): The kms key to use when retrieving the file.
|
|
3940
|
+
|
|
3941
|
+
Returns:
|
|
3942
|
+
sagemaker.model_monitor.Statistics: The Statistics object representing the file that
|
|
3943
|
+
was generated by the execution.
|
|
3944
|
+
|
|
3945
|
+
Raises:
|
|
3946
|
+
UnexpectedStatusException: This is thrown if the job is not in a 'Complete' state.
|
|
3947
|
+
|
|
3948
|
+
"""
|
|
3949
|
+
try:
|
|
3950
|
+
baselining_job_output_s3_path = self.outputs[0].s3_output.s3_uri
|
|
3951
|
+
return Statistics.from_s3_uri(
|
|
3952
|
+
statistics_file_s3_uri=s3.s3_path_join(baselining_job_output_s3_path, file_name),
|
|
3953
|
+
kms_key=kms_key,
|
|
3954
|
+
sagemaker_session=self.sagemaker_session,
|
|
3955
|
+
)
|
|
3956
|
+
except ClientError as client_error:
|
|
3957
|
+
if client_error.response["Error"]["Code"] == "NoSuchKey":
|
|
3958
|
+
status = self.sagemaker_session.sagemaker_client.describe_processing_job(
|
|
3959
|
+
ProcessingJobName=processing_job_name
|
|
3960
|
+
)["ProcessingJobStatus"]
|
|
3961
|
+
if status != "Completed":
|
|
3962
|
+
raise UnexpectedStatusException(
|
|
3963
|
+
message="The underlying job is not in 'Completed' state. You may only "
|
|
3964
|
+
"retrieve files for a job that has completed successfully.",
|
|
3965
|
+
allowed_statuses="Completed",
|
|
3966
|
+
actual_status=status,
|
|
3967
|
+
)
|
|
3968
|
+
else:
|
|
3969
|
+
raise client_error
|
|
3970
|
+
|
|
3971
|
+
def constraint_violations(
|
|
3972
|
+
self, file_name=CONSTRAINT_VIOLATIONS_JSON_DEFAULT_FILE_NAME, kms_key=None
|
|
3973
|
+
):
|
|
3974
|
+
"""Returns a sagemaker.model_monitor.
|
|
3975
|
+
|
|
3976
|
+
ConstraintViolations object representing the constraint violations
|
|
3977
|
+
JSON file generated by this monitoring execution.
|
|
3978
|
+
|
|
3979
|
+
Args:
|
|
3980
|
+
file_name (str): The name of the json-formatted constraint violations file.
|
|
3981
|
+
kms_key (str): The kms key to use when retrieving the file.
|
|
3982
|
+
|
|
3983
|
+
Returns:
|
|
3984
|
+
sagemaker.model_monitor.ConstraintViolations: The ConstraintViolations object
|
|
3985
|
+
representing the file that was generated by the monitoring execution.
|
|
3986
|
+
|
|
3987
|
+
Raises:
|
|
3988
|
+
UnexpectedStatusException: This is thrown if the job is not in a 'Complete' state.
|
|
3989
|
+
|
|
3990
|
+
"""
|
|
3991
|
+
try:
|
|
3992
|
+
baselining_job_output_s3_path = self.outputs[0].s3_output.s3_uri
|
|
3993
|
+
return ConstraintViolations.from_s3_uri(
|
|
3994
|
+
constraint_violations_file_s3_uri=s3.s3_path_join(
|
|
3995
|
+
baselining_job_output_s3_path, file_name
|
|
3996
|
+
),
|
|
3997
|
+
kms_key=kms_key,
|
|
3998
|
+
sagemaker_session=self.sagemaker_session,
|
|
3999
|
+
)
|
|
4000
|
+
except ClientError as client_error:
|
|
4001
|
+
if client_error.response["Error"]["Code"] == "NoSuchKey":
|
|
4002
|
+
status = self.sagemaker_session.sagemaker_client.describe_processing_job(
|
|
4003
|
+
ProcessingJobName=processing_job_name
|
|
4004
|
+
)["ProcessingJobStatus"]
|
|
4005
|
+
if status != "Completed":
|
|
4006
|
+
raise UnexpectedStatusException(
|
|
4007
|
+
message="The underlying job is not in 'Completed' state. You may only "
|
|
4008
|
+
"retrieve files for a job that has completed successfully.",
|
|
4009
|
+
allowed_statuses="Completed",
|
|
4010
|
+
actual_status=status,
|
|
4011
|
+
)
|
|
4012
|
+
else:
|
|
4013
|
+
raise client_error
|
|
4014
|
+
|
|
4015
|
+
|
|
4016
|
+
class EndpointInput(object):
|
|
4017
|
+
"""Accepts parameters that specify an endpoint input for monitoring execution.
|
|
4018
|
+
|
|
4019
|
+
It also provides a method to turn those parameters into a dictionary.
|
|
4020
|
+
"""
|
|
4021
|
+
|
|
4022
|
+
def __init__(
|
|
4023
|
+
self,
|
|
4024
|
+
endpoint_name,
|
|
4025
|
+
destination,
|
|
4026
|
+
s3_input_mode="File",
|
|
4027
|
+
s3_data_distribution_type="FullyReplicated",
|
|
4028
|
+
start_time_offset=None,
|
|
4029
|
+
end_time_offset=None,
|
|
4030
|
+
features_attribute=None,
|
|
4031
|
+
inference_attribute=None,
|
|
4032
|
+
probability_attribute=None,
|
|
4033
|
+
probability_threshold_attribute=None,
|
|
4034
|
+
exclude_features_attribute=None,
|
|
4035
|
+
):
|
|
4036
|
+
"""Initialize an ``EndpointInput`` instance.
|
|
4037
|
+
|
|
4038
|
+
EndpointInput accepts parameters that specify an endpoint input for a monitoring
|
|
4039
|
+
job and provides a method to turn those parameters into a dictionary.
|
|
4040
|
+
|
|
4041
|
+
Args:
|
|
4042
|
+
endpoint_name (str): The name of the endpoint.
|
|
4043
|
+
destination (str): The destination of the input.
|
|
4044
|
+
s3_input_mode (str): The S3 input mode. Can be one of: "File", "Pipe" or "FastFile".
|
|
4045
|
+
Default: "File".
|
|
4046
|
+
s3_data_distribution_type (str): The S3 Data Distribution Type. Can be one of:
|
|
4047
|
+
"FullyReplicated", "ShardedByS3Key"
|
|
4048
|
+
start_time_offset (str): Monitoring start time offset, e.g. "-PT1H"
|
|
4049
|
+
end_time_offset (str): Monitoring end time offset, e.g. "-PT0H".
|
|
4050
|
+
features_attribute (str): JSONpath to locate features in JSONlines dataset.
|
|
4051
|
+
Only used for ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4052
|
+
inference_attribute (str): Index or JSONpath to locate predicted label(s).
|
|
4053
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor, and ModelExplainabilityMonitor
|
|
4054
|
+
probability_attribute (str or int): Index or JSONpath to locate probabilities.
|
|
4055
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4056
|
+
probability_threshold_attribute (float): threshold to convert probabilities to binaries
|
|
4057
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4058
|
+
exclude_features_attribute (str): Comma separated column indices of features or
|
|
4059
|
+
actual feature names that needs to be excluded. (default: None)
|
|
4060
|
+
"""
|
|
4061
|
+
self.endpoint_name = endpoint_name
|
|
4062
|
+
self.local_path = destination
|
|
4063
|
+
self.s3_input_mode = s3_input_mode
|
|
4064
|
+
self.s3_data_distribution_type = s3_data_distribution_type
|
|
4065
|
+
self.start_time_offset = start_time_offset
|
|
4066
|
+
self.end_time_offset = end_time_offset
|
|
4067
|
+
self.features_attribute = features_attribute
|
|
4068
|
+
self.inference_attribute = inference_attribute
|
|
4069
|
+
self.probability_attribute = probability_attribute
|
|
4070
|
+
self.probability_threshold_attribute = probability_threshold_attribute
|
|
4071
|
+
self.exclude_features_attribute = exclude_features_attribute
|
|
4072
|
+
|
|
4073
|
+
def _to_request_dict(self):
|
|
4074
|
+
"""Generates a request dictionary using the parameters provided to the class."""
|
|
4075
|
+
endpoint_input = {
|
|
4076
|
+
"EndpointName": self.endpoint_name,
|
|
4077
|
+
"LocalPath": self.local_path,
|
|
4078
|
+
"S3InputMode": self.s3_input_mode,
|
|
4079
|
+
"S3DataDistributionType": self.s3_data_distribution_type,
|
|
4080
|
+
}
|
|
4081
|
+
|
|
4082
|
+
if self.start_time_offset is not None:
|
|
4083
|
+
endpoint_input["StartTimeOffset"] = self.start_time_offset
|
|
4084
|
+
if self.end_time_offset is not None:
|
|
4085
|
+
endpoint_input["EndTimeOffset"] = self.end_time_offset
|
|
4086
|
+
if self.features_attribute is not None:
|
|
4087
|
+
endpoint_input["FeaturesAttribute"] = self.features_attribute
|
|
4088
|
+
if self.inference_attribute is not None:
|
|
4089
|
+
endpoint_input["InferenceAttribute"] = self.inference_attribute
|
|
4090
|
+
if self.probability_attribute is not None:
|
|
4091
|
+
endpoint_input["ProbabilityAttribute"] = self.probability_attribute
|
|
4092
|
+
if self.probability_threshold_attribute is not None:
|
|
4093
|
+
endpoint_input["ProbabilityThresholdAttribute"] = self.probability_threshold_attribute
|
|
4094
|
+
if self.exclude_features_attribute is not None:
|
|
4095
|
+
endpoint_input["ExcludeFeaturesAttribute"] = self.exclude_features_attribute
|
|
4096
|
+
endpoint_input_request = {"EndpointInput": endpoint_input}
|
|
4097
|
+
return endpoint_input_request
|
|
4098
|
+
|
|
4099
|
+
|
|
4100
|
+
@attr.s
|
|
4101
|
+
class MonitoringInput(object):
|
|
4102
|
+
"""Accepts parameters specifying batch transform or endpoint inputs for monitoring execution.
|
|
4103
|
+
|
|
4104
|
+
MonitoringInput accepts parameters that specify additional parameters while monitoring jobs.
|
|
4105
|
+
It also provides a method to turn those parameters into a dictionary.
|
|
4106
|
+
|
|
4107
|
+
Args:
|
|
4108
|
+
start_time_offset (str): Monitoring start time offset, e.g. "-PT1H"
|
|
4109
|
+
end_time_offset (str): Monitoring end time offset, e.g. "-PT0H".
|
|
4110
|
+
features_attribute (str): JSONpath to locate features in JSONlines dataset.
|
|
4111
|
+
Only used for ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4112
|
+
inference_attribute (str): Index or JSONpath to locate predicted label(s).
|
|
4113
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor, and ModelExplainabilityMonitor
|
|
4114
|
+
probability_attribute (str): Index or JSONpath to locate probabilities.
|
|
4115
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4116
|
+
probability_threshold_attribute (float): threshold to convert probabilities to binaries
|
|
4117
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4118
|
+
"""
|
|
4119
|
+
|
|
4120
|
+
start_time_offset: str = attr.ib()
|
|
4121
|
+
end_time_offset: str = attr.ib()
|
|
4122
|
+
features_attribute: str = attr.ib()
|
|
4123
|
+
inference_attribute: str = attr.ib()
|
|
4124
|
+
probability_attribute: Union[str, int] = attr.ib()
|
|
4125
|
+
probability_threshold_attribute: float = attr.ib()
|
|
4126
|
+
|
|
4127
|
+
|
|
4128
|
+
class BatchTransformInput(MonitoringInput):
|
|
4129
|
+
"""Accepts parameters that specify a batch transform input for monitoring schedule.
|
|
4130
|
+
|
|
4131
|
+
It also provides a method to turn those parameters into a dictionary.
|
|
4132
|
+
"""
|
|
4133
|
+
|
|
4134
|
+
def __init__(
|
|
4135
|
+
self,
|
|
4136
|
+
data_captured_destination_s3_uri: str,
|
|
4137
|
+
destination: str,
|
|
4138
|
+
dataset_format: MonitoringDatasetFormat,
|
|
4139
|
+
s3_input_mode: str = "File",
|
|
4140
|
+
s3_data_distribution_type: str = "FullyReplicated",
|
|
4141
|
+
start_time_offset: str = None,
|
|
4142
|
+
end_time_offset: str = None,
|
|
4143
|
+
features_attribute: str = None,
|
|
4144
|
+
inference_attribute: str = None,
|
|
4145
|
+
probability_attribute: str = None,
|
|
4146
|
+
probability_threshold_attribute: str = None,
|
|
4147
|
+
exclude_features_attribute: str = None,
|
|
4148
|
+
):
|
|
4149
|
+
"""Initialize a `BatchTransformInput` instance.
|
|
4150
|
+
|
|
4151
|
+
Args:
|
|
4152
|
+
data_captured_destination_s3_uri (str): Location to the batch transform captured data
|
|
4153
|
+
file which needs to be analysed.
|
|
4154
|
+
destination (str): The destination of the input.
|
|
4155
|
+
s3_input_mode (str): The S3 input mode. Can be one of: "File", "Pipe" or
|
|
4156
|
+
"FastFile". (default: File)
|
|
4157
|
+
s3_data_distribution_type (str): The S3 Data Distribution Type. Can be one of:
|
|
4158
|
+
"FullyReplicated", "ShardedByS3Key" (default: FullyReplicated)
|
|
4159
|
+
start_time_offset (str): Monitoring start time offset, e.g. "-PT1H" (default: None)
|
|
4160
|
+
end_time_offset (str): Monitoring end time offset, e.g. "-PT0H". (default: None)
|
|
4161
|
+
features_attribute (str): JSONpath to locate features in JSONlines dataset.
|
|
4162
|
+
Only used for ModelBiasMonitor and ModelExplainabilityMonitor (default: None)
|
|
4163
|
+
inference_attribute (str): Index or JSONpath to locate predicted label(s).
|
|
4164
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor, and ModelExplainabilityMonitor
|
|
4165
|
+
(default: None)
|
|
4166
|
+
probability_attribute (str): Index or JSONpath to locate probabilities.
|
|
4167
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4168
|
+
(default: None)
|
|
4169
|
+
probability_threshold_attribute (float): threshold to convert probabilities to binaries
|
|
4170
|
+
Only used for ModelQualityMonitor, ModelBiasMonitor and ModelExplainabilityMonitor
|
|
4171
|
+
(default: None)
|
|
4172
|
+
exclude_features_attribute (str): Comma separated column indices of features or
|
|
4173
|
+
actual feature names that needs to be excluded. (default: None)
|
|
4174
|
+
|
|
4175
|
+
"""
|
|
4176
|
+
self.data_captured_destination_s3_uri = data_captured_destination_s3_uri
|
|
4177
|
+
self.s3_output.s3_uri = destination
|
|
4178
|
+
self.s3_input_mode = s3_input_mode
|
|
4179
|
+
self.s3_data_distribution_type = s3_data_distribution_type
|
|
4180
|
+
self.dataset_format = dataset_format
|
|
4181
|
+
self.exclude_features_attribute = exclude_features_attribute
|
|
4182
|
+
|
|
4183
|
+
super(BatchTransformInput, self).__init__(
|
|
4184
|
+
start_time_offset=start_time_offset,
|
|
4185
|
+
end_time_offset=end_time_offset,
|
|
4186
|
+
features_attribute=features_attribute,
|
|
4187
|
+
inference_attribute=inference_attribute,
|
|
4188
|
+
probability_attribute=probability_attribute,
|
|
4189
|
+
probability_threshold_attribute=probability_threshold_attribute,
|
|
4190
|
+
)
|
|
4191
|
+
|
|
4192
|
+
def _to_request_dict(self):
|
|
4193
|
+
"""Generates a request dictionary using the parameters provided to the class."""
|
|
4194
|
+
batch_transform_input_data = {
|
|
4195
|
+
"DataCapturedDestinationS3Uri": self.data_captured_destination_s3_uri,
|
|
4196
|
+
"LocalPath": self.s3_output.s3_uri,
|
|
4197
|
+
"S3InputMode": self.s3_input_mode,
|
|
4198
|
+
"S3DataDistributionType": self.s3_data_distribution_type,
|
|
4199
|
+
"DatasetFormat": self.dataset_format,
|
|
4200
|
+
}
|
|
4201
|
+
|
|
4202
|
+
if self.start_time_offset is not None:
|
|
4203
|
+
batch_transform_input_data["StartTimeOffset"] = self.start_time_offset
|
|
4204
|
+
if self.end_time_offset is not None:
|
|
4205
|
+
batch_transform_input_data["EndTimeOffset"] = self.end_time_offset
|
|
4206
|
+
if self.features_attribute is not None:
|
|
4207
|
+
batch_transform_input_data["FeaturesAttribute"] = self.features_attribute
|
|
4208
|
+
if self.inference_attribute is not None:
|
|
4209
|
+
batch_transform_input_data["InferenceAttribute"] = self.inference_attribute
|
|
4210
|
+
if self.probability_attribute is not None:
|
|
4211
|
+
batch_transform_input_data["ProbabilityAttribute"] = self.probability_attribute
|
|
4212
|
+
if self.probability_threshold_attribute is not None:
|
|
4213
|
+
batch_transform_input_data["ProbabilityThresholdAttribute"] = (
|
|
4214
|
+
self.probability_threshold_attribute
|
|
4215
|
+
)
|
|
4216
|
+
if self.exclude_features_attribute is not None:
|
|
4217
|
+
batch_transform_input_data["ExcludeFeaturesAttribute"] = self.exclude_features_attribute
|
|
4218
|
+
|
|
4219
|
+
batch_transform_input_request = {"BatchTransformInput": batch_transform_input_data}
|
|
4220
|
+
|
|
4221
|
+
return batch_transform_input_request
|
|
4222
|
+
|
|
4223
|
+
|
|
4224
|
+
class MonitoringOutput(object):
|
|
4225
|
+
"""Accepts parameters that specify an S3 output for a monitoring job.
|
|
4226
|
+
|
|
4227
|
+
It also provides a method to turn those parameters into a dictionary.
|
|
4228
|
+
"""
|
|
4229
|
+
|
|
4230
|
+
def __init__(self, source, destination=None, s3_upload_mode="Continuous"):
|
|
4231
|
+
"""Initialize a ``MonitoringOutput`` instance.
|
|
4232
|
+
|
|
4233
|
+
MonitoringOutput accepts parameters that specify an S3 output for a monitoring
|
|
4234
|
+
job and provides a method to turn those parameters into a dictionary.
|
|
4235
|
+
|
|
4236
|
+
Args:
|
|
4237
|
+
source (str): The source for the output.
|
|
4238
|
+
destination (str): The destination of the output. Optional.
|
|
4239
|
+
Default: s3://<default-session-bucket/schedule_name/output
|
|
4240
|
+
s3_upload_mode (str): The S3 upload mode.
|
|
4241
|
+
|
|
4242
|
+
"""
|
|
4243
|
+
from sagemaker.core.shapes import MonitoringS3Output
|
|
4244
|
+
|
|
4245
|
+
self.source = source
|
|
4246
|
+
self.s3_output = MonitoringS3Output(
|
|
4247
|
+
s3_uri=destination, local_path=source, s3_upload_mode=s3_upload_mode
|
|
4248
|
+
)
|
|
4249
|
+
self.s3_upload_mode = s3_upload_mode
|
|
4250
|
+
|
|
4251
|
+
def _to_request_dict(self):
|
|
4252
|
+
"""Generates a request dictionary using the parameters provided to the class.
|
|
4253
|
+
|
|
4254
|
+
Returns:
|
|
4255
|
+
dict: The request dictionary.
|
|
4256
|
+
|
|
4257
|
+
"""
|
|
4258
|
+
s3_output_request = {
|
|
4259
|
+
"S3Output": {
|
|
4260
|
+
"S3Uri": self.s3_output.s3_uri,
|
|
4261
|
+
"LocalPath": self.source,
|
|
4262
|
+
"S3UploadMode": self.s3_upload_mode,
|
|
4263
|
+
}
|
|
4264
|
+
}
|
|
4265
|
+
|
|
4266
|
+
return s3_output_request
|