sagemaker-ops-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sagemaker_ops/tui.py ADDED
@@ -0,0 +1,458 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from textual import on
6
+ from textual.binding import Binding
7
+ from textual.app import App, ComposeResult
8
+ from textual.containers import Horizontal, Vertical
9
+ from textual.widgets import DataTable, Footer, Header, RichLog, Static
10
+
11
+ from sagemaker_ops.aws import (
12
+ AwsCliError,
13
+ AwsContext,
14
+ ProcessingJobView,
15
+ PipelineExecutionView,
16
+ build_contexts,
17
+ format_dt,
18
+ format_duration,
19
+ infer_log_source,
20
+ list_active_pipeline_executions,
21
+ list_pipeline_steps,
22
+ list_processing_jobs,
23
+ tail_step_logs,
24
+ )
25
+
26
+
27
+ class BaseSageMakerApp(App[None]):
28
+ CSS = """
29
+ Screen {
30
+ layout: vertical;
31
+ }
32
+
33
+ #content {
34
+ height: 1fr;
35
+ }
36
+
37
+ DataTable {
38
+ height: 1fr;
39
+ border: solid $surface;
40
+ }
41
+
42
+ #detail, #logs {
43
+ height: 1fr;
44
+ border: solid $surface;
45
+ padding: 1;
46
+ }
47
+
48
+ #detail {
49
+ width: 40%;
50
+ }
51
+
52
+ #pipeline-content {
53
+ height: 1fr;
54
+ }
55
+
56
+ #executions-pane {
57
+ height: 42%;
58
+ }
59
+
60
+ #bottom-pane {
61
+ height: 58%;
62
+ }
63
+
64
+ #steps-pane {
65
+ width: 48%;
66
+ }
67
+
68
+ #logs-pane {
69
+ width: 52%;
70
+ }
71
+ """
72
+
73
+ BINDINGS = [
74
+ ("r", "refresh", "Refresh"),
75
+ ("q", "quit", "Quit"),
76
+ ]
77
+
78
+ def __init__(
79
+ self,
80
+ profiles: tuple[str, ...],
81
+ region: str | None,
82
+ all_profiles: bool,
83
+ refresh_seconds: int,
84
+ ) -> None:
85
+ super().__init__()
86
+ self.profiles = profiles
87
+ self.region = region
88
+ self.all_profiles = all_profiles
89
+ self.refresh_seconds = refresh_seconds
90
+ self.contexts: list[AwsContext] = []
91
+
92
+ def load_contexts(self) -> list[AwsContext]:
93
+ if not self.contexts:
94
+ self.contexts = build_contexts(self.profiles, self.region, self.all_profiles)
95
+ return self.contexts
96
+
97
+ def on_mount(self) -> None:
98
+ self.set_interval(self.refresh_seconds, self.action_refresh)
99
+ self.action_refresh()
100
+
101
+ def show_error(self, exc: Exception) -> None:
102
+ self.query_one("#status", Static).update(f"[red]{exc}[/red]")
103
+
104
+
105
+ class ProcessingJobsApp(BaseSageMakerApp):
106
+ TITLE = "SageMaker Processing Jobs"
107
+ BINDINGS = BaseSageMakerApp.BINDINGS + [
108
+ Binding("left", "previous_job", "Previous", priority=True),
109
+ Binding("up", "previous_job", "Previous", priority=True),
110
+ Binding("right", "next_job", "Next", priority=True),
111
+ Binding("down", "next_job", "Next", priority=True),
112
+ ]
113
+
114
+ def __init__(
115
+ self,
116
+ profiles: tuple[str, ...],
117
+ region: str | None,
118
+ all_profiles: bool,
119
+ refresh_seconds: int,
120
+ ) -> None:
121
+ super().__init__(profiles, region, all_profiles, refresh_seconds)
122
+ self.jobs: list[ProcessingJobView] = []
123
+
124
+ def compose(self) -> ComposeResult:
125
+ yield Header()
126
+ yield Static("Loading...", id="status")
127
+ with Horizontal(id="content"):
128
+ table = DataTable(id="jobs")
129
+ table.cursor_type = "row"
130
+ yield table
131
+ yield Static("", id="detail")
132
+ yield Footer()
133
+
134
+ def on_mount(self) -> None:
135
+ table = self.query_one("#jobs", DataTable)
136
+ table.add_columns("Profile", "Region", "Job", "Status", "Runtime", "Instance", "Created")
137
+ super().on_mount()
138
+
139
+ def action_refresh(self) -> None:
140
+ try:
141
+ self.jobs = [job for ctx in self.load_contexts() for job in list_processing_jobs(ctx)]
142
+ self.render_jobs()
143
+ self.query_one("#status", Static).update(
144
+ f"{len(self.jobs)} running processing job(s). Refresh every {self.refresh_seconds}s. "
145
+ "Use arrows to move, r to refresh, q to quit."
146
+ )
147
+ except AwsCliError as exc:
148
+ self.show_error(exc)
149
+
150
+ def action_previous_job(self) -> None:
151
+ table = self.query_one("#jobs", DataTable)
152
+ if table.row_count:
153
+ table.move_cursor(row=max(0, table.cursor_row - 1))
154
+
155
+ def action_next_job(self) -> None:
156
+ table = self.query_one("#jobs", DataTable)
157
+ if table.row_count:
158
+ table.move_cursor(row=min(table.row_count - 1, table.cursor_row + 1))
159
+
160
+ def render_jobs(self) -> None:
161
+ table = self.query_one("#jobs", DataTable)
162
+ table.clear()
163
+ for index, job in enumerate(self.jobs):
164
+ instance = job.instance_type
165
+ if job.instance_count:
166
+ instance = f"{job.instance_count}x {instance}"
167
+ table.add_row(
168
+ job.profile,
169
+ job.region,
170
+ job.name,
171
+ job.status,
172
+ format_duration(job.started_time or job.creation_time),
173
+ instance,
174
+ format_dt(job.creation_time),
175
+ key=str(index),
176
+ )
177
+ self.update_processing_detail(0 if self.jobs else None)
178
+
179
+ @on(DataTable.RowHighlighted, "#jobs")
180
+ def on_job_highlighted(self, event: DataTable.RowHighlighted) -> None:
181
+ try:
182
+ self.update_processing_detail(int(str(event.row_key.value)))
183
+ except (TypeError, ValueError):
184
+ pass
185
+
186
+ def update_processing_detail(self, index: int | None) -> None:
187
+ detail = self.query_one("#detail", Static)
188
+ if index is None or index >= len(self.jobs):
189
+ detail.update("No running processing jobs.")
190
+ return
191
+ job = self.jobs[index]
192
+ detail.update(
193
+ "\n".join(
194
+ [
195
+ f"[bold]{job.name}[/bold]",
196
+ f"Profile: {job.profile}",
197
+ f"Region: {job.region}",
198
+ f"Status: {job.status}",
199
+ f"Created: {format_dt(job.creation_time)}",
200
+ f"Started: {format_dt(job.started_time)}",
201
+ f"Runtime: {format_duration(job.started_time or job.creation_time)}",
202
+ f"Instance: {job.instance_count or ''}x {job.instance_type}".strip(),
203
+ f"Role: {job.role_arn}",
204
+ f"Arn: {job.arn}",
205
+ f"Failure: {job.failure_reason}" if job.failure_reason else "",
206
+ ]
207
+ )
208
+ )
209
+
210
+
211
+ class PipelineExecutionsApp(BaseSageMakerApp):
212
+ TITLE = "SageMaker Pipeline Executions"
213
+ BINDINGS = BaseSageMakerApp.BINDINGS + [
214
+ ("left", "focus_executions", "Executions"),
215
+ ("right", "focus_steps", "Steps"),
216
+ ("l", "load_logs", "Logs"),
217
+ ]
218
+
219
+ def __init__(
220
+ self,
221
+ profiles: tuple[str, ...],
222
+ region: str | None,
223
+ all_profiles: bool,
224
+ refresh_seconds: int,
225
+ pipeline_name: str | None,
226
+ recent_hours: int = 3,
227
+ ) -> None:
228
+ super().__init__(profiles, region, all_profiles, refresh_seconds)
229
+ self.pipeline_name = pipeline_name
230
+ self.recent_hours = recent_hours
231
+ self.executions: list[tuple[AwsContext, PipelineExecutionView]] = []
232
+ self.steps: list[dict[str, Any]] = []
233
+ self.selected_context: AwsContext | None = None
234
+ self.selected_execution_arn: str | None = None
235
+ self.loaded_log_step_key: tuple[str, str] | None = None
236
+ self._rendering_executions = False
237
+ self._updating_steps = False
238
+ self._suppress_next_execution_highlight = False
239
+ self._suppress_next_step_highlight = False
240
+
241
+ def compose(self) -> ComposeResult:
242
+ yield Header()
243
+ yield Static("Loading...", id="status")
244
+ with Vertical(id="pipeline-content"):
245
+ with Vertical(id="executions-pane"):
246
+ executions = DataTable(id="executions")
247
+ executions.cursor_type = "row"
248
+ yield executions
249
+ with Horizontal(id="bottom-pane"):
250
+ with Vertical(id="steps-pane"):
251
+ steps = DataTable(id="steps")
252
+ steps.cursor_type = "row"
253
+ yield steps
254
+ with Vertical(id="logs-pane"):
255
+ yield RichLog(id="logs", wrap=True, highlight=True, auto_scroll=False)
256
+ yield Footer()
257
+
258
+ def on_mount(self) -> None:
259
+ executions = self.query_one("#executions", DataTable)
260
+ executions.add_columns("Profile", "Region", "Pipeline", "Execution", "Status", "Runtime")
261
+ steps = self.query_one("#steps", DataTable)
262
+ steps.add_columns("Step", "Type", "Status", "Runtime", "Failure")
263
+ super().on_mount()
264
+
265
+ def action_refresh(self) -> None:
266
+ previous_execution_arn = self.selected_execution_arn
267
+ previous_step_name = self.selected_step_name()
268
+ try:
269
+ pairs: list[tuple[AwsContext, PipelineExecutionView]] = []
270
+ for ctx in self.load_contexts():
271
+ for execution in list_active_pipeline_executions(
272
+ ctx, pipeline_name=self.pipeline_name, recent_hours=self.recent_hours
273
+ ):
274
+ pairs.append((ctx, execution))
275
+ self.executions = pairs
276
+ self.render_executions(previous_execution_arn, previous_step_name, preserve_logs=True)
277
+ self.query_one("#status", Static).update(
278
+ f"{len(self.executions)} active/recent pipeline execution(s), window={self.recent_hours}h. "
279
+ "Use left/right to switch panes, arrows to move, l to load failed-step logs."
280
+ )
281
+ except AwsCliError as exc:
282
+ self.show_error(exc)
283
+
284
+ def action_focus_executions(self) -> None:
285
+ self.query_one("#executions", DataTable).focus()
286
+
287
+ def action_focus_steps(self) -> None:
288
+ self.query_one("#steps", DataTable).focus()
289
+
290
+ def action_load_logs(self) -> None:
291
+ self.load_selected_step_logs()
292
+
293
+ def render_executions(
294
+ self,
295
+ preferred_execution_arn: str | None = None,
296
+ preferred_step_name: str | None = None,
297
+ preserve_logs: bool = False,
298
+ ) -> None:
299
+ table = self.query_one("#executions", DataTable)
300
+ selected_index = 0 if self.executions else None
301
+ self._rendering_executions = True
302
+ try:
303
+ table.clear()
304
+ for index, (_, execution) in enumerate(self.executions):
305
+ if preferred_execution_arn and execution.execution_arn == preferred_execution_arn:
306
+ selected_index = index
307
+ table.add_row(
308
+ execution.profile,
309
+ execution.region,
310
+ execution.pipeline_name,
311
+ execution.display_name or execution.execution_arn.rsplit("/", 1)[-1],
312
+ execution.status,
313
+ format_duration(execution.start_time),
314
+ key=str(index),
315
+ )
316
+ if selected_index is not None and table.row_count:
317
+ self._suppress_next_execution_highlight = True
318
+ table.move_cursor(row=selected_index, scroll=False)
319
+ finally:
320
+ self._rendering_executions = False
321
+ self.update_steps(selected_index, preferred_step_name=preferred_step_name, preserve_logs=preserve_logs)
322
+
323
+ @on(DataTable.RowHighlighted, "#executions")
324
+ def on_execution_highlighted(self, event: DataTable.RowHighlighted) -> None:
325
+ if self._rendering_executions or self._suppress_next_execution_highlight:
326
+ self._suppress_next_execution_highlight = False
327
+ return
328
+ try:
329
+ self.update_steps(int(str(event.row_key.value)))
330
+ except (TypeError, ValueError):
331
+ pass
332
+
333
+ @on(DataTable.RowHighlighted, "#steps")
334
+ def on_step_highlighted(self, _: DataTable.RowHighlighted) -> None:
335
+ if self._updating_steps or self._suppress_next_step_highlight:
336
+ self._suppress_next_step_highlight = False
337
+ return
338
+ if self.loaded_log_step_key == self.selected_step_key():
339
+ return
340
+ self.loaded_log_step_key = None
341
+ self.render_step_failure_or_hint()
342
+
343
+ def update_steps(
344
+ self,
345
+ index: int | None,
346
+ preferred_step_name: str | None = None,
347
+ preserve_logs: bool = False,
348
+ ) -> None:
349
+ self.steps = []
350
+ self.selected_context = None
351
+ self.selected_execution_arn = None
352
+ logs = self.query_one("#logs", RichLog)
353
+ if index is None or index >= len(self.executions):
354
+ self.query_one("#steps", DataTable).clear()
355
+ self.loaded_log_step_key = None
356
+ logs.clear()
357
+ logs.write(f"No active or recent pipeline executions in the last {self.recent_hours}h.")
358
+ return
359
+
360
+ ctx, execution = self.executions[index]
361
+ self.selected_context = ctx
362
+ self.selected_execution_arn = execution.execution_arn
363
+ try:
364
+ self.steps = list_pipeline_steps(ctx, execution.execution_arn)
365
+ except AwsCliError as exc:
366
+ self.loaded_log_step_key = None
367
+ logs.clear()
368
+ logs.write(str(exc))
369
+ return
370
+ table = self.query_one("#steps", DataTable)
371
+ selected_step_index = 0 if self.steps else None
372
+ self._updating_steps = True
373
+ try:
374
+ table.clear()
375
+ for step_index, step in enumerate(self.steps):
376
+ if preferred_step_name and step.get("StepName", "") == preferred_step_name:
377
+ selected_step_index = step_index
378
+ table.add_row(
379
+ step.get("StepName", ""),
380
+ step.get("StepType", ""),
381
+ step.get("StepStatus", ""),
382
+ format_duration(step.get("StartTime"), step.get("EndTime")),
383
+ _shorten(step.get("FailureReason", ""), 80),
384
+ key=str(step_index),
385
+ )
386
+ if selected_step_index is not None and table.row_count:
387
+ self._suppress_next_step_highlight = True
388
+ table.move_cursor(row=selected_step_index, scroll=False)
389
+ finally:
390
+ self._updating_steps = False
391
+ if preserve_logs and self.loaded_log_step_key == self.selected_step_key():
392
+ return
393
+ self.loaded_log_step_key = None
394
+ self.render_step_failure_or_hint()
395
+
396
+ def render_step_failure_or_hint(self) -> None:
397
+ if self.loaded_log_step_key == self.selected_step_key():
398
+ return
399
+ logs = self.query_one("#logs", RichLog)
400
+ logs.clear()
401
+ step = self.selected_step()
402
+ if step is None:
403
+ logs.write("Select a step to view details.")
404
+ return
405
+ failure = step.get("FailureReason")
406
+ if failure:
407
+ logs.write(f"[bold red]Failure[/bold red] {failure}")
408
+ source = infer_log_source(step)
409
+ if source:
410
+ logs.write("Press l to load CloudWatch log tail.")
411
+ else:
412
+ logs.write("This step type has no supported CloudWatch log source yet.")
413
+ return
414
+ logs.write(
415
+ f"{step.get('StepName', '')} {step.get('StepStatus', '')} "
416
+ f"{format_duration(step.get('StartTime'), step.get('EndTime'))}"
417
+ )
418
+
419
+ def load_selected_step_logs(self) -> None:
420
+ ctx = self.selected_context
421
+ step = self.selected_step()
422
+ logs = self.query_one("#logs", RichLog)
423
+ logs.clear()
424
+ if ctx is None or step is None:
425
+ self.loaded_log_step_key = None
426
+ logs.write("Select a pipeline step first.")
427
+ return
428
+ self.loaded_log_step_key = self.selected_step_key()
429
+ failure = step.get("FailureReason")
430
+ if failure:
431
+ logs.write(f"[bold red]Failure[/bold red] {failure}")
432
+ for line in tail_step_logs(ctx, step):
433
+ logs.write(line)
434
+
435
+ def selected_step(self) -> dict[str, Any] | None:
436
+ table = self.query_one("#steps", DataTable)
437
+ if not self.steps or table.cursor_row >= len(self.steps):
438
+ return None
439
+ return self.steps[table.cursor_row]
440
+
441
+ def selected_step_name(self) -> str | None:
442
+ step = self.selected_step()
443
+ if step is None:
444
+ return None
445
+ return step.get("StepName", "")
446
+
447
+ def selected_step_key(self) -> tuple[str, str] | None:
448
+ step_name = self.selected_step_name()
449
+ if not self.selected_execution_arn or not step_name:
450
+ return None
451
+ return self.selected_execution_arn, step_name
452
+
453
+
454
+ def _shorten(value: str, max_len: int) -> str:
455
+ if len(value) <= max_len:
456
+ return value
457
+ return value[: max_len - 1] + "…"
458
+
@@ -0,0 +1,241 @@
1
+ Metadata-Version: 2.4
2
+ Name: sagemaker-ops-cli
3
+ Version: 0.1.0
4
+ Summary: CLI and TUI for submitting and monitoring Amazon SageMaker Processing Jobs and Pipelines.
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: boto3>=1.34.0
8
+ Requires-Dist: botocore>=1.34.0
9
+ Requires-Dist: rich>=13.7.0
10
+ Requires-Dist: typer>=0.12.0
11
+ Requires-Dist: textual>=0.58.0
12
+ Provides-Extra: yaml
13
+ Requires-Dist: PyYAML>=6.0.0; extra == "yaml"
14
+ Provides-Extra: dev
15
+ Requires-Dist: build>=1.2.0; extra == "dev"
16
+ Requires-Dist: moto[logs,sagemaker]>=5.0.0; extra == "dev"
17
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
18
+ Requires-Dist: ruff>=0.5.0; extra == "dev"
19
+ Requires-Dist: twine>=5.0.0; extra == "dev"
20
+
21
+ # SageMaker Ops CLI
22
+
23
+ `smops` 是一个面向 SageMaker Processing Job 和 SageMaker Pipeline 的命令行工具:
24
+
25
+ - 提交 SageMaker Processing Job
26
+ - 启动 SageMaker Pipeline execution
27
+ - 用 TUI 查看正在运行的 Processing Jobs
28
+ - 用 TUI 查看正在运行和最近结束的 Pipeline executions、steps 状态和失败 step 的 CloudWatch 日志尾部
29
+ - 支持单个、多个或所有 AWS profiles
30
+
31
+ ## 安装
32
+
33
+ ```bash
34
+ python -m venv .venv
35
+ source .venv/bin/activate
36
+ pip install -e .
37
+ ```
38
+
39
+ 从 GitHub 直接安装:
40
+
41
+ ```bash
42
+ pip install git+https://github.com/southpolemonkey/smops.git
43
+ ```
44
+
45
+ 从本地 wheel 安装:
46
+
47
+ ```bash
48
+ pip install dist/sagemaker_ops_cli-0.1.0-py3-none-any.whl
49
+ ```
50
+
51
+ 如果要读取 YAML 配置:
52
+
53
+ ```bash
54
+ pip install -e '.[yaml]'
55
+ ```
56
+
57
+ ## 构建 Python 包
58
+
59
+ ```bash
60
+ pip install -e '.[dev]'
61
+ python -m build
62
+ ```
63
+
64
+ 构建产物会输出到 `dist/`:
65
+
66
+ - `sagemaker_ops_cli-0.1.0-py3-none-any.whl`
67
+ - `sagemaker_ops_cli-0.1.0.tar.gz`
68
+
69
+ ## 提交 Processing Job
70
+
71
+ 配置文件直接使用 boto3 `create_processing_job` 的参数结构。
72
+
73
+ ```bash
74
+ smops processing submit \
75
+ --profile dev \
76
+ --region us-east-1 \
77
+ --config examples/processing-job.json
78
+ ```
79
+
80
+ 只检查请求内容,不提交:
81
+
82
+ ```bash
83
+ smops processing submit --config examples/processing-job.json --dry-run
84
+ ```
85
+
86
+ ## 启动 Pipeline
87
+
88
+ ```bash
89
+ smops pipeline start \
90
+ --profile dev \
91
+ --region us-east-1 \
92
+ --name my-pipeline \
93
+ --display-name manual-run-001 \
94
+ --parameter InputDate=2026-06-30 \
95
+ --parameter Mode=prod
96
+ ```
97
+
98
+ ## TUI 查看 Processing Jobs
99
+
100
+ ```bash
101
+ smops tui processing --profile dev --region us-east-1
102
+ ```
103
+
104
+ 多个 profile:
105
+
106
+ ```bash
107
+ smops tui processing --profile dev --profile prod --region us-east-1
108
+ ```
109
+
110
+ 所有 profile:
111
+
112
+ ```bash
113
+ smops tui processing --all-profiles
114
+ ```
115
+
116
+ 快捷键:
117
+
118
+ - `↑/↓` 或 `←/→` 切换 job
119
+ - `r` 刷新
120
+ - `q` 退出
121
+
122
+ ## TUI 查看 Pipelines
123
+
124
+ ```bash
125
+ smops tui pipelines --profile dev --region us-east-1
126
+ ```
127
+
128
+ 只看某个 pipeline:
129
+
130
+ ```bash
131
+ smops tui pipelines --profile dev --region us-east-1 --name my-pipeline
132
+ ```
133
+
134
+ 默认会显示正在运行的 executions,以及最近 3 小时内结束的 executions,方便查看成功/失败结果。可以用 `--hours` 调整窗口:
135
+
136
+ ```bash
137
+ smops tui pipelines --profile dev --region us-east-1 --name my-pipeline --hours 6
138
+ ```
139
+
140
+ 快捷键:
141
+
142
+ - `←/→` 在 executions 和 steps 面板之间切换
143
+ - `↑/↓` 移动当前面板选中行
144
+ - `l` 加载选中失败 step 的 CloudWatch 日志尾部
145
+ - `r` 刷新
146
+ - `q` 退出
147
+
148
+ 目前自动支持这些 step 的日志定位:
149
+
150
+ - ProcessingJob: `/aws/sagemaker/ProcessingJobs`
151
+ - TrainingJob: `/aws/sagemaker/TrainingJobs`
152
+ - TransformJob: `/aws/sagemaker/TransformJobs`
153
+
154
+ ## 非交互式查看
155
+
156
+ ```bash
157
+ smops processing list --profile dev --region us-east-1
158
+ smops pipeline list --profile dev --region us-east-1
159
+ smops pipeline list --profile dev --region us-east-1 --name my-pipeline --hours 6
160
+ smops pipeline steps --profile dev --region us-east-1 --execution-arn arn:aws:sagemaker:...
161
+ ```
162
+
163
+ `processing list` 默认每页读取 20 个 running jobs。输出 `Next token` 时,用它继续翻页:
164
+
165
+ ```bash
166
+ smops processing list --profile dev --region us-east-1 --max-results 20
167
+ smops processing list --profile dev --region us-east-1 --max-results 20 --next-token '<token>'
168
+ ```
169
+
170
+ `pipeline list` 不传 `--name` 时默认每页只扫描 10 个 pipelines,避免真实账号里 pipelines 很多时卡住。输出 `Next token` 时,用它继续翻页:
171
+
172
+ ```bash
173
+ smops pipeline list --profile dev --region us-east-1 --pipeline-page-size 10
174
+ smops pipeline list --profile dev --region us-east-1 --pipeline-page-size 10 --next-token '<token>'
175
+ ```
176
+
177
+ ## AWS 权限
178
+
179
+ 运行账号需要至少具备这些权限:
180
+
181
+ - `sagemaker:CreateProcessingJob`
182
+ - `sagemaker:StartPipelineExecution`
183
+ - `sagemaker:ListProcessingJobs`
184
+ - `sagemaker:DescribeProcessingJob`
185
+ - `sagemaker:ListPipelines`
186
+ - `sagemaker:ListPipelineExecutions`
187
+ - `sagemaker:DescribePipelineExecution`
188
+ - `sagemaker:ListPipelineExecutionSteps`
189
+ - `logs:DescribeLogStreams`
190
+ - `logs:GetLogEvents`
191
+
192
+
193
+ ## Mock AWS Profile
194
+
195
+ 仓库里提供了一套 mock AWS 配置,方便本地演示 profile 切换和 CLI 参数解析,不会写入真实 `~/.aws`:
196
+
197
+ ```bash
198
+ export AWS_CONFIG_FILE=examples/aws/config
199
+ export AWS_SHARED_CREDENTIALS_FILE=examples/aws/credentials
200
+ export AWS_PROFILE=mock-dev
201
+ export AWS_DEFAULT_REGION=us-east-1
202
+ ```
203
+
204
+ 也可以直接加载样例环境变量:
205
+
206
+ ```bash
207
+ set -a
208
+ source examples/aws/mock.env
209
+ set +a
210
+ ```
211
+
212
+ 然后运行:
213
+
214
+ ```bash
215
+ smops processing submit --config examples/processing-job.json --dry-run
216
+ smops processing list --profile mock-dev
217
+ smops tui processing --profile mock-dev
218
+ ```
219
+
220
+ 注意:这套 credentials 是 dummy 值,只适合 dry-run、mock、本地端点或配合 botocore Stubber/moto 使用;直接访问真实 AWS 会认证失败。
221
+
222
+ ## E2E 测试
223
+
224
+ 测试使用 `moto` 模拟 AWS SageMaker 和 CloudWatch Logs,不会访问真实 AWS:
225
+
226
+ ```bash
227
+ pip install -e '.[dev]'
228
+ pytest
229
+ ```
230
+
231
+ 覆盖范围包括:
232
+
233
+ - Processing Job 提交和 running job 分页列表
234
+ - Pipeline execution 启动和 active/recent execution 列表
235
+ - Pipeline steps 状态展示
236
+ - 失败 step 的 CloudWatch Logs tail
237
+ - Processing Job TUI 的上下左右键导航
238
+ - Pipeline TUI 的 executions、steps 和失败日志加载
239
+ - 多 AWS profile 解析
240
+
241
+ Moto 目前还没有实现 `list_pipeline_execution_steps`,测试里对这一个 paginator 做了内存 fake,其余 SageMaker/Logs 调用都在 moto 环境中执行。
@@ -0,0 +1,9 @@
1
+ sagemaker_ops/__init__.py,sha256=cNBPNnVf26SYJQHBnRMAVEN8lsbBqzz-9mubqkdJcXQ,56
2
+ sagemaker_ops/aws.py,sha256=qTW0AVB3JoGqSWxjX0UK-zxyo3DUx-3v7chdVms7zSs,17718
3
+ sagemaker_ops/cli.py,sha256=kYnf7q_eD2qRPgUB2vc1CHpyUbpC-Kg4LECbV9mUfz4,12173
4
+ sagemaker_ops/tui.py,sha256=5aDBxQnF3U3-XmjDUnVhxbLlTy4yXlSdu1z6Rfvng2M,16452
5
+ sagemaker_ops_cli-0.1.0.dist-info/METADATA,sha256=LDQzO25kmo02iCXOOwZGBupfuTJzlpZIPvAw0d6A0iQ,6214
6
+ sagemaker_ops_cli-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ sagemaker_ops_cli-0.1.0.dist-info/entry_points.txt,sha256=GwMPaX4XAFwRWaAbot6pSvXtXYc9m5zDj1uWTRFszCA,48
8
+ sagemaker_ops_cli-0.1.0.dist-info/top_level.txt,sha256=2ajnfMn6IWi-PBRpeuatLRUhcLomdOd_s9cmVrX2V9o,14
9
+ sagemaker_ops_cli-0.1.0.dist-info/RECORD,,