sagemaker-core 1.0.9__tar.gz → 1.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sagemaker-core might be problematic. Click here for more details.
- {sagemaker_core-1.0.9/src/sagemaker_core.egg-info → sagemaker_core-1.0.10}/PKG-INFO +1 -1
- sagemaker_core-1.0.10/VERSION +1 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/shape_dag.py +15 -0
- sagemaker_core-1.0.10/src/sagemaker_core/main/logs.py +167 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/resources.py +195 -51
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/shapes.py +4 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/utils.py +9 -3
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/constants.py +2 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/resources_codegen.py +47 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/templates.py +42 -6
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10/src/sagemaker_core.egg-info}/PKG-INFO +1 -1
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core.egg-info/SOURCES.txt +1 -0
- sagemaker_core-1.0.9/VERSION +0 -1
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/LICENSE +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/MANIFEST.in +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/README.rst +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/pyproject.toml +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/setup.cfg +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/__init__.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/_version.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/helper/__init__.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/helper/session_helper.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/__init__.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/__init__.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/base.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/codec.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/constants.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/config_schema.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/exceptions.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/intelligent_defaults_helper.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/user_agent.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/resources/__init__.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/shapes/__init__.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/__init__.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/codegen.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/data_extractor.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/method.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/resources_extractor.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/shapes_codegen.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/shapes_extractor.py +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core.egg-info/dependency_links.txt +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core.egg-info/requires.txt +0 -0
- {sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1.0.10
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/shape_dag.py
RENAMED
|
@@ -1380,6 +1380,11 @@ SHAPE_DAG = {
|
|
|
1380
1380
|
"shape": "AppLifecycleManagement",
|
|
1381
1381
|
"type": "structure",
|
|
1382
1382
|
},
|
|
1383
|
+
{
|
|
1384
|
+
"name": "BuiltInLifecycleConfigArn",
|
|
1385
|
+
"shape": "StudioLifecycleConfigArn",
|
|
1386
|
+
"type": "string",
|
|
1387
|
+
},
|
|
1383
1388
|
],
|
|
1384
1389
|
"type": "structure",
|
|
1385
1390
|
},
|
|
@@ -3793,6 +3798,11 @@ SHAPE_DAG = {
|
|
|
3793
3798
|
{"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"},
|
|
3794
3799
|
{"name": "FailureReason", "shape": "FailureReason", "type": "string"},
|
|
3795
3800
|
{"name": "ResourceSpec", "shape": "ResourceSpec", "type": "structure"},
|
|
3801
|
+
{
|
|
3802
|
+
"name": "BuiltInLifecycleConfigArn",
|
|
3803
|
+
"shape": "StudioLifecycleConfigArn",
|
|
3804
|
+
"type": "string",
|
|
3805
|
+
},
|
|
3796
3806
|
],
|
|
3797
3807
|
"type": "structure",
|
|
3798
3808
|
},
|
|
@@ -7699,6 +7709,11 @@ SHAPE_DAG = {
|
|
|
7699
7709
|
"type": "structure",
|
|
7700
7710
|
},
|
|
7701
7711
|
{"name": "EmrSettings", "shape": "EmrSettings", "type": "structure"},
|
|
7712
|
+
{
|
|
7713
|
+
"name": "BuiltInLifecycleConfigArn",
|
|
7714
|
+
"shape": "StudioLifecycleConfigArn",
|
|
7715
|
+
"type": "string",
|
|
7716
|
+
},
|
|
7702
7717
|
],
|
|
7703
7718
|
"type": "structure",
|
|
7704
7719
|
},
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import boto3
|
|
2
|
+
import botocore
|
|
3
|
+
|
|
4
|
+
from boto3.session import Session
|
|
5
|
+
import botocore.client
|
|
6
|
+
from botocore.config import Config
|
|
7
|
+
from typing import Generator, Tuple, List
|
|
8
|
+
from sagemaker_core.main.utils import SingletonMeta
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CloudWatchLogsClient(metaclass=SingletonMeta):
|
|
12
|
+
"""
|
|
13
|
+
A singleton class for creating a CloudWatchLogs client.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
client: botocore.client = None
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
if not self.client:
|
|
20
|
+
session = Session()
|
|
21
|
+
self.client = session.client(
|
|
22
|
+
"logs",
|
|
23
|
+
session.region_name,
|
|
24
|
+
config=Config(retries={"max_attempts": 10, "mode": "standard"}),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class LogStreamHandler:
|
|
29
|
+
log_group_name: str = None
|
|
30
|
+
log_stream_name: str = None
|
|
31
|
+
stream_id: int = None
|
|
32
|
+
next_token: str = None
|
|
33
|
+
cw_client = None
|
|
34
|
+
|
|
35
|
+
def __init__(self, log_group_name: str, log_stream_name: str, stream_id: int):
|
|
36
|
+
self.log_group_name = log_group_name
|
|
37
|
+
self.log_stream_name = log_stream_name
|
|
38
|
+
self.cw_client = CloudWatchLogsClient().client
|
|
39
|
+
self.stream_id = stream_id
|
|
40
|
+
|
|
41
|
+
def get_latest_log_events(self) -> Generator[Tuple[str, dict], None, None]:
|
|
42
|
+
"""
|
|
43
|
+
This method gets all the latest log events for this stream that exist at this moment in time.
|
|
44
|
+
|
|
45
|
+
cw_client.get_log_events() always returns a nextForwardToken even if the current batch of events is empty.
|
|
46
|
+
You can keep calling cw_client.get_log_events() with the same token until a new batch of log events exist.
|
|
47
|
+
|
|
48
|
+
API Reference: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/logs/client/get_log_events.html
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Generator[tuple[str, dict], None, None]: Generator that yields a tuple that consists for two values
|
|
52
|
+
str: stream_name,
|
|
53
|
+
dict: event dict in format
|
|
54
|
+
{
|
|
55
|
+
"ingestionTime": number,
|
|
56
|
+
"message": "string",
|
|
57
|
+
"timestamp": number
|
|
58
|
+
}
|
|
59
|
+
"""
|
|
60
|
+
while True:
|
|
61
|
+
if not self.next_token:
|
|
62
|
+
token_args = {}
|
|
63
|
+
else:
|
|
64
|
+
token_args = {"nextToken": self.next_token}
|
|
65
|
+
|
|
66
|
+
response = self.cw_client.get_log_events(
|
|
67
|
+
logGroupName=self.log_group_name,
|
|
68
|
+
logStreamName=self.log_stream_name,
|
|
69
|
+
startFromHead=True,
|
|
70
|
+
**token_args,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self.next_token = response["nextForwardToken"]
|
|
74
|
+
if not response["events"]:
|
|
75
|
+
break
|
|
76
|
+
|
|
77
|
+
for event in response["events"]:
|
|
78
|
+
yield self.log_stream_name, event
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class MultiLogStreamHandler:
|
|
82
|
+
log_group_name: str = None
|
|
83
|
+
log_stream_name_prefix: str = None
|
|
84
|
+
expected_stream_count: int = None
|
|
85
|
+
streams: List[LogStreamHandler] = []
|
|
86
|
+
cw_client = None
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self, log_group_name: str, log_stream_name_prefix: str, expected_stream_count: int
|
|
90
|
+
):
|
|
91
|
+
self.log_group_name = log_group_name
|
|
92
|
+
self.log_stream_name_prefix = log_stream_name_prefix
|
|
93
|
+
self.expected_stream_count = expected_stream_count
|
|
94
|
+
self.cw_client = CloudWatchLogsClient().client
|
|
95
|
+
|
|
96
|
+
def get_latest_log_events(self) -> Generator[Tuple[str, dict], None, None]:
|
|
97
|
+
"""
|
|
98
|
+
This method gets all the latest log events from each stream that exist at this moment.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Generator[tuple[str, dict], None, None]: Generator that yields a tuple that consists for two values
|
|
102
|
+
str: stream_name,
|
|
103
|
+
dict: event dict in format -
|
|
104
|
+
{
|
|
105
|
+
"ingestionTime": number,
|
|
106
|
+
"message": "string",
|
|
107
|
+
"timestamp": number
|
|
108
|
+
}
|
|
109
|
+
"""
|
|
110
|
+
if not self.ready():
|
|
111
|
+
return []
|
|
112
|
+
|
|
113
|
+
for stream in self.streams:
|
|
114
|
+
yield from stream.get_latest_log_events()
|
|
115
|
+
|
|
116
|
+
def ready(self) -> bool:
|
|
117
|
+
"""
|
|
118
|
+
Checks whether or not MultiLogStreamHandler is ready to serve new log events at this moment.
|
|
119
|
+
|
|
120
|
+
If self.streams is already set, return True.
|
|
121
|
+
Otherwise, check if the current number of log streams in the log group match the exptected stream count.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
bool: Whether or not MultiLogStreamHandler is ready to serve new log events.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
if len(self.streams) >= self.expected_stream_count:
|
|
128
|
+
return True
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
response = self.cw_client.describe_log_streams(
|
|
132
|
+
logGroupName=self.log_group_name,
|
|
133
|
+
logStreamNamePrefix=self.log_stream_name_prefix + "/",
|
|
134
|
+
orderBy="LogStreamName",
|
|
135
|
+
)
|
|
136
|
+
stream_names = [stream["logStreamName"] for stream in response["logStreams"]]
|
|
137
|
+
|
|
138
|
+
next_token = response.get("nextToken")
|
|
139
|
+
while next_token:
|
|
140
|
+
response = self.cw_client.describe_log_streams(
|
|
141
|
+
logGroupName=self.log_group_name,
|
|
142
|
+
logStreamNamePrefix=self.log_stream_name_prefix + "/",
|
|
143
|
+
orderBy="LogStreamName",
|
|
144
|
+
nextToken=next_token,
|
|
145
|
+
)
|
|
146
|
+
stream_names.extend([stream["logStreamName"] for stream in response["logStreams"]])
|
|
147
|
+
next_token = response.get("nextToken", None)
|
|
148
|
+
|
|
149
|
+
if len(stream_names) >= self.expected_stream_count:
|
|
150
|
+
self.streams = [
|
|
151
|
+
LogStreamHandler(self.log_group_name, log_stream_name, index)
|
|
152
|
+
for index, log_stream_name in enumerate(stream_names)
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
return True
|
|
156
|
+
else:
|
|
157
|
+
# Log streams are created whenever a container starts writing to stdout/err,
|
|
158
|
+
# so if the stream count is less than the expected number, return False
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
except botocore.exceptions.ClientError as e:
|
|
162
|
+
# On the very first training job run on an account, there's no log group until
|
|
163
|
+
# the container starts logging, so ignore any errors thrown about that
|
|
164
|
+
if e.response["Error"]["Code"] == "ResourceNotFoundException":
|
|
165
|
+
return False
|
|
166
|
+
else:
|
|
167
|
+
raise
|
|
@@ -41,6 +41,7 @@ from sagemaker_core.main.intelligent_defaults_helper import (
|
|
|
41
41
|
load_default_configs_for_resource_name,
|
|
42
42
|
get_config_value,
|
|
43
43
|
)
|
|
44
|
+
from sagemaker_core.main.logs import MultiLogStreamHandler
|
|
44
45
|
from sagemaker_core.main.shapes import *
|
|
45
46
|
from sagemaker_core.main.exceptions import *
|
|
46
47
|
|
|
@@ -812,7 +813,8 @@ class Algorithm(Base):
|
|
|
812
813
|
Group(progress, status),
|
|
813
814
|
title="Wait Log Panel",
|
|
814
815
|
border_style=Style(color=Color.BLUE.value),
|
|
815
|
-
)
|
|
816
|
+
),
|
|
817
|
+
transient=True,
|
|
816
818
|
):
|
|
817
819
|
while True:
|
|
818
820
|
self.refresh()
|
|
@@ -977,6 +979,7 @@ class App(Base):
|
|
|
977
979
|
creation_time: The creation time of the application. After an application has been shut down for 24 hours, SageMaker deletes all metadata for the application. To be considered an update and retain application metadata, applications must be restarted within 24 hours after the previous application has been shut down. After this time window, creation of an application is considered a new application rather than an update of the previous application.
|
|
978
980
|
failure_reason: The failure reason.
|
|
979
981
|
resource_spec: The instance type and the Amazon Resource Name (ARN) of the SageMaker image created on the instance.
|
|
982
|
+
built_in_lifecycle_config_arn: The lifecycle configuration that runs before the default lifecycle configuration
|
|
980
983
|
|
|
981
984
|
"""
|
|
982
985
|
|
|
@@ -992,6 +995,7 @@ class App(Base):
|
|
|
992
995
|
creation_time: Optional[datetime.datetime] = Unassigned()
|
|
993
996
|
failure_reason: Optional[str] = Unassigned()
|
|
994
997
|
resource_spec: Optional[ResourceSpec] = Unassigned()
|
|
998
|
+
built_in_lifecycle_config_arn: Optional[str] = Unassigned()
|
|
995
999
|
|
|
996
1000
|
def get_name(self) -> str:
|
|
997
1001
|
attributes = vars(self)
|
|
@@ -1270,7 +1274,8 @@ class App(Base):
|
|
|
1270
1274
|
Group(progress, status),
|
|
1271
1275
|
title="Wait Log Panel",
|
|
1272
1276
|
border_style=Style(color=Color.BLUE.value),
|
|
1273
|
-
)
|
|
1277
|
+
),
|
|
1278
|
+
transient=True,
|
|
1274
1279
|
):
|
|
1275
1280
|
while True:
|
|
1276
1281
|
self.refresh()
|
|
@@ -2652,7 +2657,11 @@ class AutoMLJob(Base):
|
|
|
2652
2657
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
2653
2658
|
|
|
2654
2659
|
@Base.add_validate_call
|
|
2655
|
-
def wait(
|
|
2660
|
+
def wait(
|
|
2661
|
+
self,
|
|
2662
|
+
poll: int = 5,
|
|
2663
|
+
timeout: Optional[int] = None,
|
|
2664
|
+
) -> None:
|
|
2656
2665
|
"""
|
|
2657
2666
|
Wait for a AutoMLJob resource.
|
|
2658
2667
|
|
|
@@ -2682,7 +2691,8 @@ class AutoMLJob(Base):
|
|
|
2682
2691
|
Group(progress, status),
|
|
2683
2692
|
title="Wait Log Panel",
|
|
2684
2693
|
border_style=Style(color=Color.BLUE.value),
|
|
2685
|
-
)
|
|
2694
|
+
),
|
|
2695
|
+
transient=True,
|
|
2686
2696
|
):
|
|
2687
2697
|
while True:
|
|
2688
2698
|
self.refresh()
|
|
@@ -3130,7 +3140,11 @@ class AutoMLJobV2(Base):
|
|
|
3130
3140
|
return self
|
|
3131
3141
|
|
|
3132
3142
|
@Base.add_validate_call
|
|
3133
|
-
def wait(
|
|
3143
|
+
def wait(
|
|
3144
|
+
self,
|
|
3145
|
+
poll: int = 5,
|
|
3146
|
+
timeout: Optional[int] = None,
|
|
3147
|
+
) -> None:
|
|
3134
3148
|
"""
|
|
3135
3149
|
Wait for a AutoMLJobV2 resource.
|
|
3136
3150
|
|
|
@@ -3160,7 +3174,8 @@ class AutoMLJobV2(Base):
|
|
|
3160
3174
|
Group(progress, status),
|
|
3161
3175
|
title="Wait Log Panel",
|
|
3162
3176
|
border_style=Style(color=Color.BLUE.value),
|
|
3163
|
-
)
|
|
3177
|
+
),
|
|
3178
|
+
transient=True,
|
|
3164
3179
|
):
|
|
3165
3180
|
while True:
|
|
3166
3181
|
self.refresh()
|
|
@@ -3534,7 +3549,8 @@ class Cluster(Base):
|
|
|
3534
3549
|
Group(progress, status),
|
|
3535
3550
|
title="Wait Log Panel",
|
|
3536
3551
|
border_style=Style(color=Color.BLUE.value),
|
|
3537
|
-
)
|
|
3552
|
+
),
|
|
3553
|
+
transient=True,
|
|
3538
3554
|
):
|
|
3539
3555
|
while True:
|
|
3540
3556
|
self.refresh()
|
|
@@ -4493,7 +4509,11 @@ class CompilationJob(Base):
|
|
|
4493
4509
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
4494
4510
|
|
|
4495
4511
|
@Base.add_validate_call
|
|
4496
|
-
def wait(
|
|
4512
|
+
def wait(
|
|
4513
|
+
self,
|
|
4514
|
+
poll: int = 5,
|
|
4515
|
+
timeout: Optional[int] = None,
|
|
4516
|
+
) -> None:
|
|
4497
4517
|
"""
|
|
4498
4518
|
Wait for a CompilationJob resource.
|
|
4499
4519
|
|
|
@@ -4523,7 +4543,8 @@ class CompilationJob(Base):
|
|
|
4523
4543
|
Group(progress, status),
|
|
4524
4544
|
title="Wait Log Panel",
|
|
4525
4545
|
border_style=Style(color=Color.BLUE.value),
|
|
4526
|
-
)
|
|
4546
|
+
),
|
|
4547
|
+
transient=True,
|
|
4527
4548
|
):
|
|
4528
4549
|
while True:
|
|
4529
4550
|
self.refresh()
|
|
@@ -6581,7 +6602,8 @@ class Domain(Base):
|
|
|
6581
6602
|
Group(progress, status),
|
|
6582
6603
|
title="Wait Log Panel",
|
|
6583
6604
|
border_style=Style(color=Color.BLUE.value),
|
|
6584
|
-
)
|
|
6605
|
+
),
|
|
6606
|
+
transient=True,
|
|
6585
6607
|
):
|
|
6586
6608
|
while True:
|
|
6587
6609
|
self.refresh()
|
|
@@ -7530,7 +7552,11 @@ class EdgePackagingJob(Base):
|
|
|
7530
7552
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
7531
7553
|
|
|
7532
7554
|
@Base.add_validate_call
|
|
7533
|
-
def wait(
|
|
7555
|
+
def wait(
|
|
7556
|
+
self,
|
|
7557
|
+
poll: int = 5,
|
|
7558
|
+
timeout: Optional[int] = None,
|
|
7559
|
+
) -> None:
|
|
7534
7560
|
"""
|
|
7535
7561
|
Wait for a EdgePackagingJob resource.
|
|
7536
7562
|
|
|
@@ -7560,7 +7586,8 @@ class EdgePackagingJob(Base):
|
|
|
7560
7586
|
Group(progress, status),
|
|
7561
7587
|
title="Wait Log Panel",
|
|
7562
7588
|
border_style=Style(color=Color.BLUE.value),
|
|
7563
|
-
)
|
|
7589
|
+
),
|
|
7590
|
+
transient=True,
|
|
7564
7591
|
):
|
|
7565
7592
|
while True:
|
|
7566
7593
|
self.refresh()
|
|
@@ -8031,7 +8058,8 @@ class Endpoint(Base):
|
|
|
8031
8058
|
Group(progress, status),
|
|
8032
8059
|
title="Wait Log Panel",
|
|
8033
8060
|
border_style=Style(color=Color.BLUE.value),
|
|
8034
|
-
)
|
|
8061
|
+
),
|
|
8062
|
+
transient=True,
|
|
8035
8063
|
):
|
|
8036
8064
|
while True:
|
|
8037
8065
|
self.refresh()
|
|
@@ -9547,7 +9575,8 @@ class FeatureGroup(Base):
|
|
|
9547
9575
|
Group(progress, status),
|
|
9548
9576
|
title="Wait Log Panel",
|
|
9549
9577
|
border_style=Style(color=Color.BLUE.value),
|
|
9550
|
-
)
|
|
9578
|
+
),
|
|
9579
|
+
transient=True,
|
|
9551
9580
|
):
|
|
9552
9581
|
while True:
|
|
9553
9582
|
self.refresh()
|
|
@@ -10415,7 +10444,8 @@ class FlowDefinition(Base):
|
|
|
10415
10444
|
Group(progress, status),
|
|
10416
10445
|
title="Wait Log Panel",
|
|
10417
10446
|
border_style=Style(color=Color.BLUE.value),
|
|
10418
|
-
)
|
|
10447
|
+
),
|
|
10448
|
+
transient=True,
|
|
10419
10449
|
):
|
|
10420
10450
|
while True:
|
|
10421
10451
|
self.refresh()
|
|
@@ -10910,7 +10940,8 @@ class Hub(Base):
|
|
|
10910
10940
|
Group(progress, status),
|
|
10911
10941
|
title="Wait Log Panel",
|
|
10912
10942
|
border_style=Style(color=Color.BLUE.value),
|
|
10913
|
-
)
|
|
10943
|
+
),
|
|
10944
|
+
transient=True,
|
|
10914
10945
|
):
|
|
10915
10946
|
while True:
|
|
10916
10947
|
self.refresh()
|
|
@@ -11299,7 +11330,8 @@ class HubContent(Base):
|
|
|
11299
11330
|
Group(progress, status),
|
|
11300
11331
|
title="Wait Log Panel",
|
|
11301
11332
|
border_style=Style(color=Color.BLUE.value),
|
|
11302
|
-
)
|
|
11333
|
+
),
|
|
11334
|
+
transient=True,
|
|
11303
11335
|
):
|
|
11304
11336
|
while True:
|
|
11305
11337
|
self.refresh()
|
|
@@ -11874,7 +11906,8 @@ class HumanTaskUi(Base):
|
|
|
11874
11906
|
Group(progress, status),
|
|
11875
11907
|
title="Wait Log Panel",
|
|
11876
11908
|
border_style=Style(color=Color.BLUE.value),
|
|
11877
|
-
)
|
|
11909
|
+
),
|
|
11910
|
+
transient=True,
|
|
11878
11911
|
):
|
|
11879
11912
|
while True:
|
|
11880
11913
|
self.refresh()
|
|
@@ -12341,7 +12374,11 @@ class HyperParameterTuningJob(Base):
|
|
|
12341
12374
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
12342
12375
|
|
|
12343
12376
|
@Base.add_validate_call
|
|
12344
|
-
def wait(
|
|
12377
|
+
def wait(
|
|
12378
|
+
self,
|
|
12379
|
+
poll: int = 5,
|
|
12380
|
+
timeout: Optional[int] = None,
|
|
12381
|
+
) -> None:
|
|
12345
12382
|
"""
|
|
12346
12383
|
Wait for a HyperParameterTuningJob resource.
|
|
12347
12384
|
|
|
@@ -12371,7 +12408,8 @@ class HyperParameterTuningJob(Base):
|
|
|
12371
12408
|
Group(progress, status),
|
|
12372
12409
|
title="Wait Log Panel",
|
|
12373
12410
|
border_style=Style(color=Color.BLUE.value),
|
|
12374
|
-
)
|
|
12411
|
+
),
|
|
12412
|
+
transient=True,
|
|
12375
12413
|
):
|
|
12376
12414
|
while True:
|
|
12377
12415
|
self.refresh()
|
|
@@ -12943,7 +12981,8 @@ class Image(Base):
|
|
|
12943
12981
|
Group(progress, status),
|
|
12944
12982
|
title="Wait Log Panel",
|
|
12945
12983
|
border_style=Style(color=Color.BLUE.value),
|
|
12946
|
-
)
|
|
12984
|
+
),
|
|
12985
|
+
transient=True,
|
|
12947
12986
|
):
|
|
12948
12987
|
while True:
|
|
12949
12988
|
self.refresh()
|
|
@@ -13549,7 +13588,8 @@ class ImageVersion(Base):
|
|
|
13549
13588
|
Group(progress, status),
|
|
13550
13589
|
title="Wait Log Panel",
|
|
13551
13590
|
border_style=Style(color=Color.BLUE.value),
|
|
13552
|
-
)
|
|
13591
|
+
),
|
|
13592
|
+
transient=True,
|
|
13553
13593
|
):
|
|
13554
13594
|
while True:
|
|
13555
13595
|
self.refresh()
|
|
@@ -13966,7 +14006,8 @@ class InferenceComponent(Base):
|
|
|
13966
14006
|
Group(progress, status),
|
|
13967
14007
|
title="Wait Log Panel",
|
|
13968
14008
|
border_style=Style(color=Color.BLUE.value),
|
|
13969
|
-
)
|
|
14009
|
+
),
|
|
14010
|
+
transient=True,
|
|
13970
14011
|
):
|
|
13971
14012
|
while True:
|
|
13972
14013
|
self.refresh()
|
|
@@ -14601,7 +14642,8 @@ class InferenceExperiment(Base):
|
|
|
14601
14642
|
Group(progress, status),
|
|
14602
14643
|
title="Wait Log Panel",
|
|
14603
14644
|
border_style=Style(color=Color.BLUE.value),
|
|
14604
|
-
)
|
|
14645
|
+
),
|
|
14646
|
+
transient=True,
|
|
14605
14647
|
):
|
|
14606
14648
|
while True:
|
|
14607
14649
|
self.refresh()
|
|
@@ -14974,7 +15016,11 @@ class InferenceRecommendationsJob(Base):
|
|
|
14974
15016
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
14975
15017
|
|
|
14976
15018
|
@Base.add_validate_call
|
|
14977
|
-
def wait(
|
|
15019
|
+
def wait(
|
|
15020
|
+
self,
|
|
15021
|
+
poll: int = 5,
|
|
15022
|
+
timeout: Optional[int] = None,
|
|
15023
|
+
) -> None:
|
|
14978
15024
|
"""
|
|
14979
15025
|
Wait for a InferenceRecommendationsJob resource.
|
|
14980
15026
|
|
|
@@ -15004,7 +15050,8 @@ class InferenceRecommendationsJob(Base):
|
|
|
15004
15050
|
Group(progress, status),
|
|
15005
15051
|
title="Wait Log Panel",
|
|
15006
15052
|
border_style=Style(color=Color.BLUE.value),
|
|
15007
|
-
)
|
|
15053
|
+
),
|
|
15054
|
+
transient=True,
|
|
15008
15055
|
):
|
|
15009
15056
|
while True:
|
|
15010
15057
|
self.refresh()
|
|
@@ -15536,7 +15583,11 @@ class LabelingJob(Base):
|
|
|
15536
15583
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
15537
15584
|
|
|
15538
15585
|
@Base.add_validate_call
|
|
15539
|
-
def wait(
|
|
15586
|
+
def wait(
|
|
15587
|
+
self,
|
|
15588
|
+
poll: int = 5,
|
|
15589
|
+
timeout: Optional[int] = None,
|
|
15590
|
+
) -> None:
|
|
15540
15591
|
"""
|
|
15541
15592
|
Wait for a LabelingJob resource.
|
|
15542
15593
|
|
|
@@ -15566,7 +15617,8 @@ class LabelingJob(Base):
|
|
|
15566
15617
|
Group(progress, status),
|
|
15567
15618
|
title="Wait Log Panel",
|
|
15568
15619
|
border_style=Style(color=Color.BLUE.value),
|
|
15569
|
-
)
|
|
15620
|
+
),
|
|
15621
|
+
transient=True,
|
|
15570
15622
|
):
|
|
15571
15623
|
while True:
|
|
15572
15624
|
self.refresh()
|
|
@@ -16314,7 +16366,8 @@ class MlflowTrackingServer(Base):
|
|
|
16314
16366
|
Group(progress, status),
|
|
16315
16367
|
title="Wait Log Panel",
|
|
16316
16368
|
border_style=Style(color=Color.BLUE.value),
|
|
16317
|
-
)
|
|
16369
|
+
),
|
|
16370
|
+
transient=True,
|
|
16318
16371
|
):
|
|
16319
16372
|
while True:
|
|
16320
16373
|
self.refresh()
|
|
@@ -17572,7 +17625,8 @@ class ModelCard(Base):
|
|
|
17572
17625
|
Group(progress, status),
|
|
17573
17626
|
title="Wait Log Panel",
|
|
17574
17627
|
border_style=Style(color=Color.BLUE.value),
|
|
17575
|
-
)
|
|
17628
|
+
),
|
|
17629
|
+
transient=True,
|
|
17576
17630
|
):
|
|
17577
17631
|
while True:
|
|
17578
17632
|
self.refresh()
|
|
@@ -17946,7 +18000,11 @@ class ModelCardExportJob(Base):
|
|
|
17946
18000
|
return self
|
|
17947
18001
|
|
|
17948
18002
|
@Base.add_validate_call
|
|
17949
|
-
def wait(
|
|
18003
|
+
def wait(
|
|
18004
|
+
self,
|
|
18005
|
+
poll: int = 5,
|
|
18006
|
+
timeout: Optional[int] = None,
|
|
18007
|
+
) -> None:
|
|
17950
18008
|
"""
|
|
17951
18009
|
Wait for a ModelCardExportJob resource.
|
|
17952
18010
|
|
|
@@ -17976,7 +18034,8 @@ class ModelCardExportJob(Base):
|
|
|
17976
18034
|
Group(progress, status),
|
|
17977
18035
|
title="Wait Log Panel",
|
|
17978
18036
|
border_style=Style(color=Color.BLUE.value),
|
|
17979
|
-
)
|
|
18037
|
+
),
|
|
18038
|
+
transient=True,
|
|
17980
18039
|
):
|
|
17981
18040
|
while True:
|
|
17982
18041
|
self.refresh()
|
|
@@ -18933,7 +18992,8 @@ class ModelPackage(Base):
|
|
|
18933
18992
|
Group(progress, status),
|
|
18934
18993
|
title="Wait Log Panel",
|
|
18935
18994
|
border_style=Style(color=Color.BLUE.value),
|
|
18936
|
-
)
|
|
18995
|
+
),
|
|
18996
|
+
transient=True,
|
|
18937
18997
|
):
|
|
18938
18998
|
while True:
|
|
18939
18999
|
self.refresh()
|
|
@@ -19400,7 +19460,8 @@ class ModelPackageGroup(Base):
|
|
|
19400
19460
|
Group(progress, status),
|
|
19401
19461
|
title="Wait Log Panel",
|
|
19402
19462
|
border_style=Style(color=Color.BLUE.value),
|
|
19403
|
-
)
|
|
19463
|
+
),
|
|
19464
|
+
transient=True,
|
|
19404
19465
|
):
|
|
19405
19466
|
while True:
|
|
19406
19467
|
self.refresh()
|
|
@@ -20789,7 +20850,8 @@ class MonitoringSchedule(Base):
|
|
|
20789
20850
|
Group(progress, status),
|
|
20790
20851
|
title="Wait Log Panel",
|
|
20791
20852
|
border_style=Style(color=Color.BLUE.value),
|
|
20792
|
-
)
|
|
20853
|
+
),
|
|
20854
|
+
transient=True,
|
|
20793
20855
|
):
|
|
20794
20856
|
while True:
|
|
20795
20857
|
self.refresh()
|
|
@@ -21358,7 +21420,8 @@ class NotebookInstance(Base):
|
|
|
21358
21420
|
Group(progress, status),
|
|
21359
21421
|
title="Wait Log Panel",
|
|
21360
21422
|
border_style=Style(color=Color.BLUE.value),
|
|
21361
|
-
)
|
|
21423
|
+
),
|
|
21424
|
+
transient=True,
|
|
21362
21425
|
):
|
|
21363
21426
|
while True:
|
|
21364
21427
|
self.refresh()
|
|
@@ -22197,7 +22260,11 @@ class OptimizationJob(Base):
|
|
|
22197
22260
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
22198
22261
|
|
|
22199
22262
|
@Base.add_validate_call
|
|
22200
|
-
def wait(
|
|
22263
|
+
def wait(
|
|
22264
|
+
self,
|
|
22265
|
+
poll: int = 5,
|
|
22266
|
+
timeout: Optional[int] = None,
|
|
22267
|
+
) -> None:
|
|
22201
22268
|
"""
|
|
22202
22269
|
Wait for a OptimizationJob resource.
|
|
22203
22270
|
|
|
@@ -22227,7 +22294,8 @@ class OptimizationJob(Base):
|
|
|
22227
22294
|
Group(progress, status),
|
|
22228
22295
|
title="Wait Log Panel",
|
|
22229
22296
|
border_style=Style(color=Color.BLUE.value),
|
|
22230
|
-
)
|
|
22297
|
+
),
|
|
22298
|
+
transient=True,
|
|
22231
22299
|
):
|
|
22232
22300
|
while True:
|
|
22233
22301
|
self.refresh()
|
|
@@ -22698,7 +22766,8 @@ class Pipeline(Base):
|
|
|
22698
22766
|
Group(progress, status),
|
|
22699
22767
|
title="Wait Log Panel",
|
|
22700
22768
|
border_style=Style(color=Color.BLUE.value),
|
|
22701
|
-
)
|
|
22769
|
+
),
|
|
22770
|
+
transient=True,
|
|
22702
22771
|
):
|
|
22703
22772
|
while True:
|
|
22704
22773
|
self.refresh()
|
|
@@ -23096,7 +23165,8 @@ class PipelineExecution(Base):
|
|
|
23096
23165
|
Group(progress, status),
|
|
23097
23166
|
title="Wait Log Panel",
|
|
23098
23167
|
border_style=Style(color=Color.BLUE.value),
|
|
23099
|
-
)
|
|
23168
|
+
),
|
|
23169
|
+
transient=True,
|
|
23100
23170
|
):
|
|
23101
23171
|
while True:
|
|
23102
23172
|
self.refresh()
|
|
@@ -24073,13 +24143,19 @@ class ProcessingJob(Base):
|
|
|
24073
24143
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
24074
24144
|
|
|
24075
24145
|
@Base.add_validate_call
|
|
24076
|
-
def wait(
|
|
24146
|
+
def wait(
|
|
24147
|
+
self,
|
|
24148
|
+
poll: int = 5,
|
|
24149
|
+
timeout: Optional[int] = None,
|
|
24150
|
+
logs: Optional[bool] = False,
|
|
24151
|
+
) -> None:
|
|
24077
24152
|
"""
|
|
24078
24153
|
Wait for a ProcessingJob resource.
|
|
24079
24154
|
|
|
24080
24155
|
Parameters:
|
|
24081
24156
|
poll: The number of seconds to wait between each poll.
|
|
24082
24157
|
timeout: The maximum number of seconds to wait before timing out.
|
|
24158
|
+
logs: Whether to print logs while waiting.
|
|
24083
24159
|
|
|
24084
24160
|
Raises:
|
|
24085
24161
|
TimeoutExceededError: If the resource does not reach a terminal state before the timeout.
|
|
@@ -24098,18 +24174,32 @@ class ProcessingJob(Base):
|
|
|
24098
24174
|
progress.add_task("Waiting for ProcessingJob...")
|
|
24099
24175
|
status = Status("Current status:")
|
|
24100
24176
|
|
|
24177
|
+
instance_count = self.processing_resources.cluster_config.instance_count
|
|
24178
|
+
if logs:
|
|
24179
|
+
multi_stream_logger = MultiLogStreamHandler(
|
|
24180
|
+
log_group_name=f"/aws/sagemaker/ProcessingJobs",
|
|
24181
|
+
log_stream_name_prefix=self.get_name(),
|
|
24182
|
+
expected_stream_count=instance_count,
|
|
24183
|
+
)
|
|
24184
|
+
|
|
24101
24185
|
with Live(
|
|
24102
24186
|
Panel(
|
|
24103
24187
|
Group(progress, status),
|
|
24104
24188
|
title="Wait Log Panel",
|
|
24105
24189
|
border_style=Style(color=Color.BLUE.value),
|
|
24106
|
-
)
|
|
24190
|
+
),
|
|
24191
|
+
transient=True,
|
|
24107
24192
|
):
|
|
24108
24193
|
while True:
|
|
24109
24194
|
self.refresh()
|
|
24110
24195
|
current_status = self.processing_job_status
|
|
24111
24196
|
status.update(f"Current status: [bold]{current_status}")
|
|
24112
24197
|
|
|
24198
|
+
if logs and multi_stream_logger.ready():
|
|
24199
|
+
stream_log_events = multi_stream_logger.get_latest_log_events()
|
|
24200
|
+
for stream_id, event in stream_log_events:
|
|
24201
|
+
logger.info(f"{stream_id}:\n{event['message']}")
|
|
24202
|
+
|
|
24113
24203
|
if current_status in terminal_states:
|
|
24114
24204
|
logger.info(f"Final Resource Status: [bold]{current_status}")
|
|
24115
24205
|
|
|
@@ -24537,7 +24627,8 @@ class Project(Base):
|
|
|
24537
24627
|
Group(progress, status),
|
|
24538
24628
|
title="Wait Log Panel",
|
|
24539
24629
|
border_style=Style(color=Color.BLUE.value),
|
|
24540
|
-
)
|
|
24630
|
+
),
|
|
24631
|
+
transient=True,
|
|
24541
24632
|
):
|
|
24542
24633
|
while True:
|
|
24543
24634
|
self.refresh()
|
|
@@ -25182,7 +25273,8 @@ class Space(Base):
|
|
|
25182
25273
|
Group(progress, status),
|
|
25183
25274
|
title="Wait Log Panel",
|
|
25184
25275
|
border_style=Style(color=Color.BLUE.value),
|
|
25185
|
-
)
|
|
25276
|
+
),
|
|
25277
|
+
transient=True,
|
|
25186
25278
|
):
|
|
25187
25279
|
while True:
|
|
25188
25280
|
self.refresh()
|
|
@@ -26433,13 +26525,19 @@ class TrainingJob(Base):
|
|
|
26433
26525
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
26434
26526
|
|
|
26435
26527
|
@Base.add_validate_call
|
|
26436
|
-
def wait(
|
|
26528
|
+
def wait(
|
|
26529
|
+
self,
|
|
26530
|
+
poll: int = 5,
|
|
26531
|
+
timeout: Optional[int] = None,
|
|
26532
|
+
logs: Optional[bool] = False,
|
|
26533
|
+
) -> None:
|
|
26437
26534
|
"""
|
|
26438
26535
|
Wait for a TrainingJob resource.
|
|
26439
26536
|
|
|
26440
26537
|
Parameters:
|
|
26441
26538
|
poll: The number of seconds to wait between each poll.
|
|
26442
26539
|
timeout: The maximum number of seconds to wait before timing out.
|
|
26540
|
+
logs: Whether to print logs while waiting.
|
|
26443
26541
|
|
|
26444
26542
|
Raises:
|
|
26445
26543
|
TimeoutExceededError: If the resource does not reach a terminal state before the timeout.
|
|
@@ -26458,18 +26556,41 @@ class TrainingJob(Base):
|
|
|
26458
26556
|
progress.add_task("Waiting for TrainingJob...")
|
|
26459
26557
|
status = Status("Current status:")
|
|
26460
26558
|
|
|
26559
|
+
instance_count = (
|
|
26560
|
+
sum(
|
|
26561
|
+
instance_group.instance_count
|
|
26562
|
+
for instance_group in self.resource_config.instance_groups
|
|
26563
|
+
)
|
|
26564
|
+
if self.resource_config.instance_groups
|
|
26565
|
+
and not isinstance(self.resource_config.instance_groups, Unassigned)
|
|
26566
|
+
else self.resource_config.instance_count
|
|
26567
|
+
)
|
|
26568
|
+
|
|
26569
|
+
if logs:
|
|
26570
|
+
multi_stream_logger = MultiLogStreamHandler(
|
|
26571
|
+
log_group_name=f"/aws/sagemaker/TrainingJobs",
|
|
26572
|
+
log_stream_name_prefix=self.get_name(),
|
|
26573
|
+
expected_stream_count=instance_count,
|
|
26574
|
+
)
|
|
26575
|
+
|
|
26461
26576
|
with Live(
|
|
26462
26577
|
Panel(
|
|
26463
26578
|
Group(progress, status),
|
|
26464
26579
|
title="Wait Log Panel",
|
|
26465
26580
|
border_style=Style(color=Color.BLUE.value),
|
|
26466
|
-
)
|
|
26581
|
+
),
|
|
26582
|
+
transient=True,
|
|
26467
26583
|
):
|
|
26468
26584
|
while True:
|
|
26469
26585
|
self.refresh()
|
|
26470
26586
|
current_status = self.training_job_status
|
|
26471
26587
|
status.update(f"Current status: [bold]{current_status}")
|
|
26472
26588
|
|
|
26589
|
+
if logs and multi_stream_logger.ready():
|
|
26590
|
+
stream_log_events = multi_stream_logger.get_latest_log_events()
|
|
26591
|
+
for stream_id, event in stream_log_events:
|
|
26592
|
+
logger.info(f"{stream_id}:\n{event['message']}")
|
|
26593
|
+
|
|
26473
26594
|
if current_status in terminal_states:
|
|
26474
26595
|
logger.info(f"Final Resource Status: [bold]{current_status}")
|
|
26475
26596
|
|
|
@@ -26884,13 +27005,19 @@ class TransformJob(Base):
|
|
|
26884
27005
|
logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}")
|
|
26885
27006
|
|
|
26886
27007
|
@Base.add_validate_call
|
|
26887
|
-
def wait(
|
|
27008
|
+
def wait(
|
|
27009
|
+
self,
|
|
27010
|
+
poll: int = 5,
|
|
27011
|
+
timeout: Optional[int] = None,
|
|
27012
|
+
logs: Optional[bool] = False,
|
|
27013
|
+
) -> None:
|
|
26888
27014
|
"""
|
|
26889
27015
|
Wait for a TransformJob resource.
|
|
26890
27016
|
|
|
26891
27017
|
Parameters:
|
|
26892
27018
|
poll: The number of seconds to wait between each poll.
|
|
26893
27019
|
timeout: The maximum number of seconds to wait before timing out.
|
|
27020
|
+
logs: Whether to print logs while waiting.
|
|
26894
27021
|
|
|
26895
27022
|
Raises:
|
|
26896
27023
|
TimeoutExceededError: If the resource does not reach a terminal state before the timeout.
|
|
@@ -26909,18 +27036,32 @@ class TransformJob(Base):
|
|
|
26909
27036
|
progress.add_task("Waiting for TransformJob...")
|
|
26910
27037
|
status = Status("Current status:")
|
|
26911
27038
|
|
|
27039
|
+
instance_count = self.transform_resources.instance_count
|
|
27040
|
+
if logs:
|
|
27041
|
+
multi_stream_logger = MultiLogStreamHandler(
|
|
27042
|
+
log_group_name=f"/aws/sagemaker/TransformJobs",
|
|
27043
|
+
log_stream_name_prefix=self.get_name(),
|
|
27044
|
+
expected_stream_count=instance_count,
|
|
27045
|
+
)
|
|
27046
|
+
|
|
26912
27047
|
with Live(
|
|
26913
27048
|
Panel(
|
|
26914
27049
|
Group(progress, status),
|
|
26915
27050
|
title="Wait Log Panel",
|
|
26916
27051
|
border_style=Style(color=Color.BLUE.value),
|
|
26917
|
-
)
|
|
27052
|
+
),
|
|
27053
|
+
transient=True,
|
|
26918
27054
|
):
|
|
26919
27055
|
while True:
|
|
26920
27056
|
self.refresh()
|
|
26921
27057
|
current_status = self.transform_job_status
|
|
26922
27058
|
status.update(f"Current status: [bold]{current_status}")
|
|
26923
27059
|
|
|
27060
|
+
if logs and multi_stream_logger.ready():
|
|
27061
|
+
stream_log_events = multi_stream_logger.get_latest_log_events()
|
|
27062
|
+
for stream_id, event in stream_log_events:
|
|
27063
|
+
logger.info(f"{stream_id}:\n{event['message']}")
|
|
27064
|
+
|
|
26924
27065
|
if current_status in terminal_states:
|
|
26925
27066
|
logger.info(f"Final Resource Status: [bold]{current_status}")
|
|
26926
27067
|
|
|
@@ -27736,7 +27877,8 @@ class TrialComponent(Base):
|
|
|
27736
27877
|
Group(progress, status),
|
|
27737
27878
|
title="Wait Log Panel",
|
|
27738
27879
|
border_style=Style(color=Color.BLUE.value),
|
|
27739
|
-
)
|
|
27880
|
+
),
|
|
27881
|
+
transient=True,
|
|
27740
27882
|
):
|
|
27741
27883
|
while True:
|
|
27742
27884
|
self.refresh()
|
|
@@ -28397,7 +28539,8 @@ class UserProfile(Base):
|
|
|
28397
28539
|
Group(progress, status),
|
|
28398
28540
|
title="Wait Log Panel",
|
|
28399
28541
|
border_style=Style(color=Color.BLUE.value),
|
|
28400
|
-
)
|
|
28542
|
+
),
|
|
28543
|
+
transient=True,
|
|
28401
28544
|
):
|
|
28402
28545
|
while True:
|
|
28403
28546
|
self.refresh()
|
|
@@ -28882,7 +29025,8 @@ class Workforce(Base):
|
|
|
28882
29025
|
Group(progress, status),
|
|
28883
29026
|
title="Wait Log Panel",
|
|
28884
29027
|
border_style=Style(color=Color.BLUE.value),
|
|
28885
|
-
)
|
|
29028
|
+
),
|
|
29029
|
+
transient=True,
|
|
28886
29030
|
):
|
|
28887
29031
|
while True:
|
|
28888
29032
|
self.refresh()
|
|
@@ -3306,12 +3306,14 @@ class CodeEditorAppSettings(Base):
|
|
|
3306
3306
|
custom_images: A list of custom SageMaker images that are configured to run as a Code Editor app.
|
|
3307
3307
|
lifecycle_config_arns: The Amazon Resource Name (ARN) of the Code Editor application lifecycle configuration.
|
|
3308
3308
|
app_lifecycle_management: Settings that are used to configure and manage the lifecycle of CodeEditor applications.
|
|
3309
|
+
built_in_lifecycle_config_arn: The lifecycle configuration that runs before the default lifecycle configuration. It can override changes made in the default lifecycle configuration.
|
|
3309
3310
|
"""
|
|
3310
3311
|
|
|
3311
3312
|
default_resource_spec: Optional[ResourceSpec] = Unassigned()
|
|
3312
3313
|
custom_images: Optional[List[CustomImage]] = Unassigned()
|
|
3313
3314
|
lifecycle_config_arns: Optional[List[str]] = Unassigned()
|
|
3314
3315
|
app_lifecycle_management: Optional[AppLifecycleManagement] = Unassigned()
|
|
3316
|
+
built_in_lifecycle_config_arn: Optional[str] = Unassigned()
|
|
3315
3317
|
|
|
3316
3318
|
|
|
3317
3319
|
class CodeRepository(Base):
|
|
@@ -4235,6 +4237,7 @@ class JupyterLabAppSettings(Base):
|
|
|
4235
4237
|
code_repositories: A list of Git repositories that SageMaker automatically displays to users for cloning in the JupyterLab application.
|
|
4236
4238
|
app_lifecycle_management: Indicates whether idle shutdown is activated for JupyterLab applications.
|
|
4237
4239
|
emr_settings: The configuration parameters that specify the IAM roles assumed by the execution role of SageMaker (assumable roles) and the cluster instances or job execution environments (execution roles or runtime roles) to manage and access resources required for running Amazon EMR clusters or Amazon EMR Serverless applications.
|
|
4240
|
+
built_in_lifecycle_config_arn: The lifecycle configuration that runs before the default lifecycle configuration. It can override changes made in the default lifecycle configuration.
|
|
4238
4241
|
"""
|
|
4239
4242
|
|
|
4240
4243
|
default_resource_spec: Optional[ResourceSpec] = Unassigned()
|
|
@@ -4243,6 +4246,7 @@ class JupyterLabAppSettings(Base):
|
|
|
4243
4246
|
code_repositories: Optional[List[CodeRepository]] = Unassigned()
|
|
4244
4247
|
app_lifecycle_management: Optional[AppLifecycleManagement] = Unassigned()
|
|
4245
4248
|
emr_settings: Optional[EmrSettings] = Unassigned()
|
|
4249
|
+
built_in_lifecycle_config_arn: Optional[str] = Unassigned()
|
|
4246
4250
|
|
|
4247
4251
|
|
|
4248
4252
|
class DefaultEbsStorageSettings(Base):
|
|
@@ -160,6 +160,12 @@ def enable_textual_rich_console_and_traceback():
|
|
|
160
160
|
textual_rich_console_and_traceback_enabled = True
|
|
161
161
|
|
|
162
162
|
|
|
163
|
+
def get_rich_handler():
|
|
164
|
+
handler = RichHandler(markup=True)
|
|
165
|
+
handler.setFormatter(logging.Formatter("%(message)s"))
|
|
166
|
+
return handler
|
|
167
|
+
|
|
168
|
+
|
|
163
169
|
def get_textual_rich_logger(name: str, log_level: str = "INFO") -> logging.Logger:
|
|
164
170
|
"""
|
|
165
171
|
Get a logger with textual rich handler.
|
|
@@ -175,7 +181,7 @@ def get_textual_rich_logger(name: str, log_level: str = "INFO") -> logging.Logge
|
|
|
175
181
|
|
|
176
182
|
"""
|
|
177
183
|
enable_textual_rich_console_and_traceback()
|
|
178
|
-
handler =
|
|
184
|
+
handler = get_rich_handler()
|
|
179
185
|
logging.basicConfig(level=getattr(logging, log_level), handlers=[handler])
|
|
180
186
|
logger = logging.getLogger(name)
|
|
181
187
|
|
|
@@ -217,8 +223,8 @@ def configure_logging(log_level=None):
|
|
|
217
223
|
# reset any currently associated handlers with log level
|
|
218
224
|
for handler in _logger.handlers:
|
|
219
225
|
_logger.removeHandler(handler)
|
|
220
|
-
|
|
221
|
-
_logger.addHandler(
|
|
226
|
+
rich_handler = get_rich_handler()
|
|
227
|
+
_logger.addHandler(rich_handler)
|
|
222
228
|
|
|
223
229
|
|
|
224
230
|
def is_snake_case(s: str):
|
|
@@ -20,6 +20,8 @@ OBJECT_METHODS = set(
|
|
|
20
20
|
|
|
21
21
|
TERMINAL_STATES = set(["Completed", "Stopped", "Deleted", "Failed", "Succeeded", "Cancelled"])
|
|
22
22
|
|
|
23
|
+
RESOURCE_WITH_LOGS = set(["TrainingJob", "ProcessingJob", "TransformJob"])
|
|
24
|
+
|
|
23
25
|
CONFIGURABLE_ATTRIBUTE_SUBSTRINGS = [
|
|
24
26
|
"kms",
|
|
25
27
|
"s3",
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/resources_codegen.py
RENAMED
|
@@ -29,6 +29,7 @@ from sagemaker_core.tools.constants import (
|
|
|
29
29
|
CONFIG_SCHEMA_FILE_NAME,
|
|
30
30
|
PYTHON_TYPES_TO_BASIC_JSON_TYPES,
|
|
31
31
|
CONFIGURABLE_ATTRIBUTE_SUBSTRINGS,
|
|
32
|
+
RESOURCE_WITH_LOGS,
|
|
32
33
|
)
|
|
33
34
|
from sagemaker_core.tools.method import Method, MethodType
|
|
34
35
|
from sagemaker_core.main.utils import (
|
|
@@ -71,6 +72,8 @@ from sagemaker_core.tools.templates import (
|
|
|
71
72
|
GET_ALL_METHOD_WITH_ARGS_TEMPLATE,
|
|
72
73
|
UPDATE_METHOD_TEMPLATE_WITHOUT_DECORATOR,
|
|
73
74
|
RESOURCE_METHOD_EXCEPTION_DOCSTRING,
|
|
75
|
+
INIT_WAIT_LOGS_TEMPLATE,
|
|
76
|
+
PRINT_WAIT_LOGS,
|
|
74
77
|
)
|
|
75
78
|
from sagemaker_core.tools.data_extractor import (
|
|
76
79
|
load_combined_shapes_data,
|
|
@@ -188,6 +191,7 @@ class ResourcesCodeGen:
|
|
|
188
191
|
"from sagemaker_core.main.utils import SageMakerClient, ResourceIterator, Unassigned, get_textual_rich_logger, "
|
|
189
192
|
"snake_to_pascal, pascal_to_snake, is_not_primitive, is_not_str_dict, is_primitive_list, serialize",
|
|
190
193
|
"from sagemaker_core.main.intelligent_defaults_helper import load_default_configs_for_resource_name, get_config_value",
|
|
194
|
+
"from sagemaker_core.main.logs import MultiLogStreamHandler",
|
|
191
195
|
"from sagemaker_core.main.shapes import *",
|
|
192
196
|
"from sagemaker_core.main.exceptions import *",
|
|
193
197
|
]
|
|
@@ -1541,6 +1545,28 @@ class ResourcesCodeGen:
|
|
|
1541
1545
|
|
|
1542
1546
|
return "'(Unknown)'"
|
|
1543
1547
|
|
|
1548
|
+
def _get_instance_count_ref(self, resource_name: str) -> str:
|
|
1549
|
+
"""Get the instance count reference for a resource object.
|
|
1550
|
+
Args:
|
|
1551
|
+
resource_name (str): The resource name.
|
|
1552
|
+
Returns:
|
|
1553
|
+
str: The instance count reference for resource object
|
|
1554
|
+
"""
|
|
1555
|
+
|
|
1556
|
+
if resource_name == "TrainingJob":
|
|
1557
|
+
return """(
|
|
1558
|
+
sum(instance_group.instance_count for instance_group in self.resource_config.instance_groups)
|
|
1559
|
+
if self.resource_config.instance_groups and not isinstance(self.resource_config.instance_groups, Unassigned)
|
|
1560
|
+
else self.resource_config.instance_count
|
|
1561
|
+
)
|
|
1562
|
+
"""
|
|
1563
|
+
elif resource_name == "TransformJob":
|
|
1564
|
+
return "self.transform_resources.instance_count"
|
|
1565
|
+
elif resource_name == "ProcessingJob":
|
|
1566
|
+
return "self.processing_resources.cluster_config.instance_count"
|
|
1567
|
+
|
|
1568
|
+
raise ValueError(f"Instance count reference not found for resource {resource_name}")
|
|
1569
|
+
|
|
1544
1570
|
def generate_wait_method(self, resource_name: str) -> str:
|
|
1545
1571
|
"""Auto-Generate WAIT Method for a waitable resource.
|
|
1546
1572
|
|
|
@@ -1573,11 +1599,32 @@ class ResourcesCodeGen:
|
|
|
1573
1599
|
)
|
|
1574
1600
|
formatted_failed_block = add_indent(formatted_failed_block, 16)
|
|
1575
1601
|
|
|
1602
|
+
logs_arg = ""
|
|
1603
|
+
logs_arg_doc = ""
|
|
1604
|
+
init_wait_logs = ""
|
|
1605
|
+
print_wait_logs = ""
|
|
1606
|
+
if resource_name in RESOURCE_WITH_LOGS:
|
|
1607
|
+
logs_arg = "logs: Optional[bool] = False,"
|
|
1608
|
+
logs_arg_doc = "logs: Whether to print logs while waiting.\n"
|
|
1609
|
+
|
|
1610
|
+
instance_count = self._get_instance_count_ref(resource_name)
|
|
1611
|
+
init_wait_logs = add_indent(
|
|
1612
|
+
INIT_WAIT_LOGS_TEMPLATE.format(
|
|
1613
|
+
get_instance_count=instance_count,
|
|
1614
|
+
job_type=resource_name,
|
|
1615
|
+
)
|
|
1616
|
+
)
|
|
1617
|
+
print_wait_logs = add_indent(PRINT_WAIT_LOGS, 12)
|
|
1618
|
+
|
|
1576
1619
|
formatted_method = WAIT_METHOD_TEMPLATE.format(
|
|
1577
1620
|
terminal_resource_states=terminal_resource_states,
|
|
1578
1621
|
status_key_path=status_key_path,
|
|
1579
1622
|
failed_error_block=formatted_failed_block,
|
|
1580
1623
|
resource_name=resource_name,
|
|
1624
|
+
logs_arg=logs_arg,
|
|
1625
|
+
logs_arg_doc=logs_arg_doc,
|
|
1626
|
+
init_wait_logs=init_wait_logs,
|
|
1627
|
+
print_wait_logs=print_wait_logs,
|
|
1581
1628
|
)
|
|
1582
1629
|
return formatted_method
|
|
1583
1630
|
|
|
@@ -262,12 +262,31 @@ if "failed" in current_status.lower():
|
|
|
262
262
|
raise FailedStatusError(resource_type="{resource_name}", status=current_status, reason={reason})
|
|
263
263
|
"""
|
|
264
264
|
|
|
265
|
+
INIT_WAIT_LOGS_TEMPLATE = """
|
|
266
|
+
instance_count = {get_instance_count}
|
|
267
|
+
if logs:
|
|
268
|
+
multi_stream_logger = MultiLogStreamHandler(
|
|
269
|
+
log_group_name=f"/aws/sagemaker/{job_type}s",
|
|
270
|
+
log_stream_name_prefix=self.get_name(),
|
|
271
|
+
expected_stream_count=instance_count
|
|
272
|
+
)
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
PRINT_WAIT_LOGS = """
|
|
276
|
+
if logs and multi_stream_logger.ready():
|
|
277
|
+
stream_log_events = multi_stream_logger.get_latest_log_events()
|
|
278
|
+
for stream_id, event in stream_log_events:
|
|
279
|
+
logger.info(f"{stream_id}:\\n{event['message']}")
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
|
|
265
283
|
WAIT_METHOD_TEMPLATE = '''
|
|
266
284
|
@Base.add_validate_call
|
|
267
285
|
def wait(
|
|
268
286
|
self,
|
|
269
287
|
poll: int = 5,
|
|
270
|
-
timeout: Optional[int] = None
|
|
288
|
+
timeout: Optional[int] = None,
|
|
289
|
+
{logs_arg}
|
|
271
290
|
) -> None:
|
|
272
291
|
"""
|
|
273
292
|
Wait for a {resource_name} resource.
|
|
@@ -275,7 +294,7 @@ def wait(
|
|
|
275
294
|
Parameters:
|
|
276
295
|
poll: The number of seconds to wait between each poll.
|
|
277
296
|
timeout: The maximum number of seconds to wait before timing out.
|
|
278
|
-
|
|
297
|
+
{logs_arg_doc}
|
|
279
298
|
Raises:
|
|
280
299
|
TimeoutExceededError: If the resource does not reach a terminal state before the timeout.
|
|
281
300
|
FailedStatusError: If the resource reaches a failed state.
|
|
@@ -291,13 +310,22 @@ def wait(
|
|
|
291
310
|
)
|
|
292
311
|
progress.add_task("Waiting for {resource_name}...")
|
|
293
312
|
status = Status("Current status:")
|
|
294
|
-
|
|
295
|
-
|
|
313
|
+
{init_wait_logs}
|
|
314
|
+
|
|
315
|
+
with Live(
|
|
316
|
+
Panel(
|
|
317
|
+
Group(progress, status),
|
|
318
|
+
title="Wait Log Panel",
|
|
319
|
+
border_style=Style(color=Color.BLUE.value
|
|
320
|
+
)
|
|
321
|
+
),
|
|
322
|
+
transient=True
|
|
323
|
+
):
|
|
296
324
|
while True:
|
|
297
325
|
self.refresh()
|
|
298
326
|
current_status = self{status_key_path}
|
|
299
327
|
status.update(f"Current status: [bold]{{current_status}}")
|
|
300
|
-
|
|
328
|
+
{print_wait_logs}
|
|
301
329
|
if current_status in terminal_states:
|
|
302
330
|
logger.info(f"Final Resource Status: [bold]{{current_status}}")
|
|
303
331
|
{failed_error_block}
|
|
@@ -338,7 +366,15 @@ def wait_for_status(
|
|
|
338
366
|
progress.add_task(f"Waiting for {resource_name} to reach [bold]{{target_status}} status...")
|
|
339
367
|
status = Status("Current status:")
|
|
340
368
|
|
|
341
|
-
with Live(
|
|
369
|
+
with Live(
|
|
370
|
+
Panel(
|
|
371
|
+
Group(progress, status),
|
|
372
|
+
title="Wait Log Panel",
|
|
373
|
+
border_style=Style(color=Color.BLUE.value
|
|
374
|
+
)
|
|
375
|
+
),
|
|
376
|
+
transient=True
|
|
377
|
+
):
|
|
342
378
|
while True:
|
|
343
379
|
self.refresh()
|
|
344
380
|
current_status = self{status_key_path}
|
|
@@ -17,6 +17,7 @@ src/sagemaker_core/main/__init__.py
|
|
|
17
17
|
src/sagemaker_core/main/config_schema.py
|
|
18
18
|
src/sagemaker_core/main/exceptions.py
|
|
19
19
|
src/sagemaker_core/main/intelligent_defaults_helper.py
|
|
20
|
+
src/sagemaker_core/main/logs.py
|
|
20
21
|
src/sagemaker_core/main/resources.py
|
|
21
22
|
src/sagemaker_core/main/shapes.py
|
|
22
23
|
src/sagemaker_core/main/user_agent.py
|
sagemaker_core-1.0.9/VERSION
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
1.0.9
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/__init__.py
RENAMED
|
File without changes
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/base.py
RENAMED
|
File without changes
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/codec.py
RENAMED
|
File without changes
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/main/code_injection/constants.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core/tools/resources_extractor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sagemaker_core-1.0.9 → sagemaker_core-1.0.10}/src/sagemaker_core.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|