sagemaker-ops-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sagemaker_ops/__init__.py +4 -0
- sagemaker_ops/aws.py +502 -0
- sagemaker_ops/cli.py +262 -0
- sagemaker_ops/tui.py +458 -0
- sagemaker_ops_cli-0.1.0.dist-info/METADATA +241 -0
- sagemaker_ops_cli-0.1.0.dist-info/RECORD +9 -0
- sagemaker_ops_cli-0.1.0.dist-info/WHEEL +5 -0
- sagemaker_ops_cli-0.1.0.dist-info/entry_points.txt +2 -0
- sagemaker_ops_cli-0.1.0.dist-info/top_level.txt +1 -0
sagemaker_ops/cli.py
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Annotated
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
|
|
11
|
+
from sagemaker_ops import __version__
|
|
12
|
+
from sagemaker_ops.aws import (
|
|
13
|
+
AwsCliError,
|
|
14
|
+
build_contexts,
|
|
15
|
+
describe_pipeline_execution,
|
|
16
|
+
format_dt,
|
|
17
|
+
format_duration,
|
|
18
|
+
list_active_pipeline_executions,
|
|
19
|
+
list_pipeline_executions_page,
|
|
20
|
+
list_pipeline_steps,
|
|
21
|
+
list_processing_jobs,
|
|
22
|
+
list_processing_jobs_page,
|
|
23
|
+
load_job_spec,
|
|
24
|
+
parse_parameters,
|
|
25
|
+
start_pipeline_execution,
|
|
26
|
+
submit_processing_job,
|
|
27
|
+
)
|
|
28
|
+
from sagemaker_ops.tui import ProcessingJobsApp, PipelineExecutionsApp
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
console = Console()
|
|
32
|
+
app = typer.Typer(help="SageMaker Processing Job 与 Pipeline 运维 CLI。", no_args_is_help=True)
|
|
33
|
+
processing_app = typer.Typer(help="提交和查看 SageMaker Processing Job。", no_args_is_help=True)
|
|
34
|
+
pipeline_app = typer.Typer(help="启动和查看 SageMaker Pipeline。", no_args_is_help=True)
|
|
35
|
+
tui_app = typer.Typer(help="交互式 TUI。", no_args_is_help=True)
|
|
36
|
+
app.add_typer(processing_app, name="processing")
|
|
37
|
+
app.add_typer(pipeline_app, name="pipeline")
|
|
38
|
+
app.add_typer(tui_app, name="tui")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def version_callback(value: bool) -> None:
|
|
42
|
+
if value:
|
|
43
|
+
console.print(f"smops {__version__}")
|
|
44
|
+
raise typer.Exit()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.callback()
|
|
48
|
+
def main(
|
|
49
|
+
version: Annotated[
|
|
50
|
+
bool,
|
|
51
|
+
typer.Option("--version", callback=version_callback, help="显示版本。"),
|
|
52
|
+
] = False,
|
|
53
|
+
) -> None:
|
|
54
|
+
_ = version
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@processing_app.command("submit")
|
|
58
|
+
def processing_submit(
|
|
59
|
+
config: Annotated[Path, typer.Option("--config", "-c", exists=True, help="create_processing_job 的 JSON/YAML 参数文件。")],
|
|
60
|
+
profile: Annotated[str | None, typer.Option("--profile", "-p", help="AWS profile。")] = None,
|
|
61
|
+
region: Annotated[str | None, typer.Option("--region", "-r", help="AWS region。")] = None,
|
|
62
|
+
dry_run: Annotated[bool, typer.Option("--dry-run", help="只打印请求,不提交。")] = False,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""提交 SageMaker Processing Job。"""
|
|
65
|
+
try:
|
|
66
|
+
spec = load_job_spec(config)
|
|
67
|
+
if dry_run:
|
|
68
|
+
console.print_json(json.dumps(spec, default=str))
|
|
69
|
+
return
|
|
70
|
+
ctx = build_contexts((profile,) if profile else (), region)[0]
|
|
71
|
+
response = submit_processing_job(ctx, spec)
|
|
72
|
+
except AwsCliError as exc:
|
|
73
|
+
console.print(f"[red]{exc}[/red]")
|
|
74
|
+
raise typer.Exit(1) from exc
|
|
75
|
+
console.print("[green]Processing job 已提交[/green]")
|
|
76
|
+
console.print_json(json.dumps(response, default=str))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@processing_app.command("list")
|
|
80
|
+
def processing_list(
|
|
81
|
+
profile: Annotated[list[str] | None, typer.Option("--profile", "-p", help="AWS profile,可重复传。")] = None,
|
|
82
|
+
region: Annotated[str | None, typer.Option("--region", "-r", help="AWS region。")] = None,
|
|
83
|
+
all_profiles: Annotated[bool, typer.Option("--all-profiles", help="查看本机所有 AWS profiles。")] = False,
|
|
84
|
+
max_results: Annotated[int, typer.Option("--max-results", min=1, max=100, help="每页最多读取多少个 running job。")] = 20,
|
|
85
|
+
next_token: Annotated[str | None, typer.Option("--next-token", help="上一页输出的 Next token。")] = None,
|
|
86
|
+
) -> None:
|
|
87
|
+
"""按页列出正在运行的 Processing Jobs。"""
|
|
88
|
+
try:
|
|
89
|
+
with console.status("正在查询 SageMaker Processing Jobs..."):
|
|
90
|
+
contexts = build_contexts(tuple(profile or ()), region, all_profiles=all_profiles)
|
|
91
|
+
if next_token and len(contexts) != 1:
|
|
92
|
+
raise AwsCliError("--next-token 只支持单个 AWS profile 查询")
|
|
93
|
+
pages = [list_processing_jobs_page(ctx, page_size=max_results, next_token=next_token) for ctx in contexts]
|
|
94
|
+
jobs = [job for page in pages for job in page.jobs]
|
|
95
|
+
except AwsCliError as exc:
|
|
96
|
+
console.print(f"[red]{exc}[/red]")
|
|
97
|
+
raise typer.Exit(1) from exc
|
|
98
|
+
|
|
99
|
+
next_tokens = [page.next_token for page in pages if page.next_token]
|
|
100
|
+
if not jobs:
|
|
101
|
+
console.print("[yellow]当前查询范围没有正在运行的 processing jobs。[/yellow]")
|
|
102
|
+
if len(contexts) == 1 and next_tokens:
|
|
103
|
+
console.print(f"Next token: {next_tokens[0]}")
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
table = Table("Profile", "Region", "Job", "Status", "Runtime", "Instance", "Created")
|
|
107
|
+
for job in jobs:
|
|
108
|
+
instance = job.instance_type
|
|
109
|
+
if job.instance_count:
|
|
110
|
+
instance = f"{job.instance_count}x {instance}"
|
|
111
|
+
table.add_row(
|
|
112
|
+
job.profile,
|
|
113
|
+
job.region,
|
|
114
|
+
job.name,
|
|
115
|
+
job.status,
|
|
116
|
+
format_duration(job.started_time or job.creation_time),
|
|
117
|
+
instance,
|
|
118
|
+
format_dt(job.creation_time),
|
|
119
|
+
)
|
|
120
|
+
console.print(table)
|
|
121
|
+
if len(contexts) == 1 and next_tokens:
|
|
122
|
+
console.print(f"Next token: {next_tokens[0]}")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@pipeline_app.command("start")
|
|
126
|
+
def pipeline_start(
|
|
127
|
+
name: Annotated[str, typer.Option("--name", "-n", help="SageMaker Pipeline 名称。")],
|
|
128
|
+
profile: Annotated[str | None, typer.Option("--profile", "-p", help="AWS profile。")] = None,
|
|
129
|
+
region: Annotated[str | None, typer.Option("--region", "-r", help="AWS region。")] = None,
|
|
130
|
+
display_name: Annotated[str | None, typer.Option("--display-name", help="Pipeline execution display name。")] = None,
|
|
131
|
+
parameter: Annotated[list[str] | None, typer.Option("--parameter", help="Pipeline 参数,格式 NAME=VALUE,可重复传。")] = None,
|
|
132
|
+
client_request_token: Annotated[str | None, typer.Option("--client-request-token", help="幂等 token。")] = None,
|
|
133
|
+
) -> None:
|
|
134
|
+
"""启动 SageMaker Pipeline execution。"""
|
|
135
|
+
try:
|
|
136
|
+
ctx = build_contexts((profile,) if profile else (), region)[0]
|
|
137
|
+
response = start_pipeline_execution(
|
|
138
|
+
ctx,
|
|
139
|
+
pipeline_name=name,
|
|
140
|
+
display_name=display_name,
|
|
141
|
+
parameters=parse_parameters(parameter or ()),
|
|
142
|
+
client_request_token=client_request_token,
|
|
143
|
+
)
|
|
144
|
+
except AwsCliError as exc:
|
|
145
|
+
console.print(f"[red]{exc}[/red]")
|
|
146
|
+
raise typer.Exit(1) from exc
|
|
147
|
+
console.print("[green]Pipeline execution 已启动[/green]")
|
|
148
|
+
console.print_json(json.dumps(response, default=str))
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@pipeline_app.command("list")
|
|
152
|
+
def pipeline_list(
|
|
153
|
+
pipeline_name: Annotated[str | None, typer.Option("--name", "-n", help="只查看某个 Pipeline。")] = None,
|
|
154
|
+
profile: Annotated[list[str] | None, typer.Option("--profile", "-p", help="AWS profile,可重复传。")] = None,
|
|
155
|
+
region: Annotated[str | None, typer.Option("--region", "-r", help="AWS region。")] = None,
|
|
156
|
+
all_profiles: Annotated[bool, typer.Option("--all-profiles", help="查看本机所有 AWS profiles。")] = False,
|
|
157
|
+
per_pipeline: Annotated[int, typer.Option("--per-pipeline", min=1, max=100, help="每个 pipeline 最多读取多少个 execution。")] = 10,
|
|
158
|
+
hours: Annotated[int, typer.Option("--hours", min=1, max=168, help="额外显示最近多少小时内结束的 executions。")] = 3,
|
|
159
|
+
pipeline_page_size: Annotated[int, typer.Option("--pipeline-page-size", min=1, max=100, help="不传 --name 时每页扫描多少个 pipelines。")] = 10,
|
|
160
|
+
next_token: Annotated[str | None, typer.Option("--next-token", help="上一页输出的 Next token。")] = None,
|
|
161
|
+
) -> None:
|
|
162
|
+
"""用表格列出正在运行和最近结束的 Pipeline executions。"""
|
|
163
|
+
try:
|
|
164
|
+
with console.status("正在查询 SageMaker Pipeline executions..."):
|
|
165
|
+
contexts = build_contexts(tuple(profile or ()), region, all_profiles=all_profiles)
|
|
166
|
+
if next_token and (len(contexts) != 1 or pipeline_name):
|
|
167
|
+
raise AwsCliError("--next-token 只支持单个 AWS profile 且不指定 --name 的查询")
|
|
168
|
+
pages = [
|
|
169
|
+
list_pipeline_executions_page(
|
|
170
|
+
ctx,
|
|
171
|
+
pipeline_name=pipeline_name,
|
|
172
|
+
per_pipeline=per_pipeline,
|
|
173
|
+
recent_hours=hours,
|
|
174
|
+
pipeline_page_size=pipeline_page_size,
|
|
175
|
+
next_token=next_token,
|
|
176
|
+
)
|
|
177
|
+
for ctx in contexts
|
|
178
|
+
]
|
|
179
|
+
executions = [item for page in pages for item in page.executions]
|
|
180
|
+
except AwsCliError as exc:
|
|
181
|
+
console.print(f"[red]{exc}[/red]")
|
|
182
|
+
raise typer.Exit(1) from exc
|
|
183
|
+
|
|
184
|
+
next_tokens = [page.next_token for page in pages if page.next_token]
|
|
185
|
+
if not executions:
|
|
186
|
+
target = f"Pipeline {pipeline_name}" if pipeline_name else "当前页"
|
|
187
|
+
console.print(f"[yellow]{target} 没有正在运行或最近 {hours} 小时内结束的 pipeline executions。[/yellow]")
|
|
188
|
+
if len(contexts) == 1 and next_tokens:
|
|
189
|
+
console.print(f"Next token: {next_tokens[0]}")
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
table = Table("Profile", "Region", "Pipeline", "Execution", "Status", "Runtime", "Started")
|
|
193
|
+
for execution in executions:
|
|
194
|
+
table.add_row(
|
|
195
|
+
execution.profile,
|
|
196
|
+
execution.region,
|
|
197
|
+
execution.pipeline_name,
|
|
198
|
+
execution.display_name or execution.execution_arn.rsplit("/", 1)[-1],
|
|
199
|
+
execution.status,
|
|
200
|
+
format_duration(execution.start_time),
|
|
201
|
+
format_dt(execution.start_time),
|
|
202
|
+
)
|
|
203
|
+
console.print(table)
|
|
204
|
+
if len(contexts) == 1 and next_tokens:
|
|
205
|
+
console.print(f"Next token: {next_tokens[0]}")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@pipeline_app.command("steps")
|
|
209
|
+
def pipeline_steps(
|
|
210
|
+
execution_arn: Annotated[str, typer.Option("--execution-arn", "-e", help="Pipeline execution ARN。")],
|
|
211
|
+
profile: Annotated[str | None, typer.Option("--profile", "-p", help="AWS profile。")] = None,
|
|
212
|
+
region: Annotated[str | None, typer.Option("--region", "-r", help="AWS region。")] = None,
|
|
213
|
+
) -> None:
|
|
214
|
+
"""查看某个 Pipeline execution 的 steps。"""
|
|
215
|
+
try:
|
|
216
|
+
ctx = build_contexts((profile,) if profile else (), region)[0]
|
|
217
|
+
detail = describe_pipeline_execution(ctx, execution_arn)
|
|
218
|
+
steps = list_pipeline_steps(ctx, execution_arn)
|
|
219
|
+
except AwsCliError as exc:
|
|
220
|
+
console.print(f"[red]{exc}[/red]")
|
|
221
|
+
raise typer.Exit(1) from exc
|
|
222
|
+
|
|
223
|
+
console.print(f"[bold]{detail.get('PipelineName', '')}[/bold] {detail.get('PipelineExecutionStatus', '')}")
|
|
224
|
+
table = Table("Step", "Type", "Status", "Runtime", "Failure")
|
|
225
|
+
for step in steps:
|
|
226
|
+
table.add_row(
|
|
227
|
+
step.get("StepName", ""),
|
|
228
|
+
step.get("StepType", ""),
|
|
229
|
+
step.get("StepStatus", ""),
|
|
230
|
+
format_duration(step.get("StartTime"), step.get("EndTime")),
|
|
231
|
+
step.get("FailureReason", ""),
|
|
232
|
+
)
|
|
233
|
+
console.print(table)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@tui_app.command("processing")
|
|
237
|
+
def tui_processing(
|
|
238
|
+
profile: Annotated[list[str] | None, typer.Option("--profile", "-p", help="AWS profile,可重复传。")] = None,
|
|
239
|
+
region: Annotated[str | None, typer.Option("--region", "-r", help="AWS region。")] = None,
|
|
240
|
+
all_profiles: Annotated[bool, typer.Option("--all-profiles", help="查看本机所有 AWS profiles。")] = False,
|
|
241
|
+
refresh: Annotated[int, typer.Option("--refresh", min=5, max=300, help="刷新间隔秒数。")] = 15,
|
|
242
|
+
) -> None:
|
|
243
|
+
"""交互式查看正在运行的 Processing Jobs。"""
|
|
244
|
+
ProcessingJobsApp(tuple(profile or ()), region, all_profiles, refresh).run()
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
@tui_app.command("pipelines")
|
|
248
|
+
def tui_pipelines(
|
|
249
|
+
pipeline_name: Annotated[str | None, typer.Option("--name", "-n", help="只查看某个 Pipeline。")] = None,
|
|
250
|
+
profile: Annotated[list[str] | None, typer.Option("--profile", "-p", help="AWS profile,可重复传。")] = None,
|
|
251
|
+
region: Annotated[str | None, typer.Option("--region", "-r", help="AWS region。")] = None,
|
|
252
|
+
all_profiles: Annotated[bool, typer.Option("--all-profiles", help="查看本机所有 AWS profiles。")] = False,
|
|
253
|
+
refresh: Annotated[int, typer.Option("--refresh", min=5, max=300, help="刷新间隔秒数。")] = 15,
|
|
254
|
+
hours: Annotated[int, typer.Option("--hours", min=1, max=168, help="额外显示最近多少小时内结束的 executions。")] = 3,
|
|
255
|
+
) -> None:
|
|
256
|
+
"""交互式查看正在运行和最近结束的 Pipeline executions、steps 和失败日志。"""
|
|
257
|
+
PipelineExecutionsApp(tuple(profile or ()), region, all_profiles, refresh, pipeline_name, hours).run()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
if __name__ == "__main__":
|
|
261
|
+
app()
|
|
262
|
+
|