alpha-engine-lib 0.35.0__tar.gz → 0.36.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/PKG-INFO +1 -1
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/pyproject.toml +1 -1
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/__init__.py +1 -1
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/__init__.py +4 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/read.py +390 -66
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/registry.py +64 -36
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/PKG-INFO +1 -1
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_read.py +321 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_registry.py +15 -5
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/README.md +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/setup.cfg +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/agent_schemas.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/alerts.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/arcticdb.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/collector_results.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/cost.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/dates.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/decision_capture.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ec2_spot.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/email_sender.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/eval_artifacts.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/logging.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/model_pricing.yaml +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pillars.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/templates.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/preflight.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/__init__.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/db.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/embeddings.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/migrations/0001_content_tsv.sql +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/rerank.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/retrieval.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/schema.sql +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/reconcile.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/secrets.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/sources/__init__.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/sources/protocols.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ssm_dispatcher.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ssm_log_capture.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/telegram.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/trading_calendar.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/transparency.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/transparency_inventory.yaml +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/universe.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/SOURCES.txt +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/dependency_links.txt +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/requires.txt +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/top_level.txt +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_agent_schemas.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_alerts.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_arcticdb.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_collector_results.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_cost.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_dates.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_decision_capture.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_ec2_spot.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_email_sender.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_eval_artifacts.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_logging.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pillars.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_templates.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_preflight.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_rag.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_rag_rerank.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_rag_retrieval_hybrid.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_reconcile.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_secrets.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_sources_protocols.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_ssm_dispatcher.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_ssm_log_capture.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_telegram.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_trading_calendar.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_transparency.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_universe.py +0 -0
- {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_version_pin.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alpha-engine-lib
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.36.0
|
|
4
4
|
Summary: Shared utilities for the Alpha Engine modules: preflight, structured logging with secret-redaction, ArcticDB universe access, NYSE-calendar dates + freshness predicates, decision capture, cost telemetry, RAG, agent output schemas, SSM-backed secrets, Telegram alerts + SNS fan-out, EC2 spot-launch resilience, SSM log-capture chokepoint, SSM send-command + poll chokepoint, and Step-Functions execution-state projection. Full surface documented in README.
|
|
5
5
|
Author: Brian McMahon
|
|
6
6
|
License: Proprietary
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "alpha-engine-lib"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.36.0"
|
|
8
8
|
description = "Shared utilities for the Alpha Engine modules: preflight, structured logging with secret-redaction, ArcticDB universe access, NYSE-calendar dates + freshness predicates, decision capture, cost telemetry, RAG, agent output schemas, SSM-backed secrets, Telegram alerts + SNS fan-out, EC2 spot-launch resilience, SSM log-capture chokepoint, SSM send-command + poll chokepoint, and Step-Functions execution-state projection. Full surface documented in README."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
# EC2 still runs Python 3.9 on the always-on micro instance (boto3 drops
|
{alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/__init__.py
RENAMED
|
@@ -31,6 +31,7 @@ on first build, not after the second consumer arrives.
|
|
|
31
31
|
from __future__ import annotations
|
|
32
32
|
|
|
33
33
|
from .read import (
|
|
34
|
+
PipelineExecutionSummary,
|
|
34
35
|
PipelineRun,
|
|
35
36
|
RunStatus,
|
|
36
37
|
SFNAccessDenied,
|
|
@@ -38,6 +39,7 @@ from .read import (
|
|
|
38
39
|
SFNThrottled,
|
|
39
40
|
TaskRow,
|
|
40
41
|
TaskStatus,
|
|
42
|
+
list_recent_pipeline_runs,
|
|
41
43
|
read_pipeline_state,
|
|
42
44
|
)
|
|
43
45
|
from .registry import (
|
|
@@ -54,6 +56,7 @@ __all__ = [
|
|
|
54
56
|
"ArchivePageRef",
|
|
55
57
|
"ArtifactReason",
|
|
56
58
|
"PIPELINE_LABELS",
|
|
59
|
+
"PipelineExecutionSummary",
|
|
57
60
|
"PipelineRun",
|
|
58
61
|
"RunStatus",
|
|
59
62
|
"SFNAccessDenied",
|
|
@@ -66,5 +69,6 @@ __all__ = [
|
|
|
66
69
|
"WAIT_GROUPING",
|
|
67
70
|
"format_failure_message",
|
|
68
71
|
"format_success_message",
|
|
72
|
+
"list_recent_pipeline_runs",
|
|
69
73
|
"read_pipeline_state",
|
|
70
74
|
]
|
{alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/read.py
RENAMED
|
@@ -31,11 +31,12 @@ red banner always names a specific cause.
|
|
|
31
31
|
|
|
32
32
|
from __future__ import annotations
|
|
33
33
|
|
|
34
|
+
import json
|
|
34
35
|
import logging
|
|
35
36
|
from dataclasses import dataclass
|
|
36
37
|
from datetime import datetime, timezone
|
|
37
38
|
from enum import Enum
|
|
38
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
39
|
+
from typing import TYPE_CHECKING, Annotated, Any, Optional, Union
|
|
39
40
|
|
|
40
41
|
from pydantic import BaseModel, ConfigDict, Field
|
|
41
42
|
|
|
@@ -160,7 +161,21 @@ class TaskRow(BaseModel):
|
|
|
160
161
|
# substrate-only reason). ``None`` here means "state name not in the
|
|
161
162
|
# registry" and is a CI-time bug — the consumer should treat it as a
|
|
162
163
|
# registry-drift signal, not a renderable placeholder.
|
|
163
|
-
|
|
164
|
+
#
|
|
165
|
+
# The Annotated[Union[...], Field(discriminator="kind")] form is the
|
|
166
|
+
# SOTA tagged-union pattern for Pydantic V2: ``model_dump(mode="json")``
|
|
167
|
+
# serializes the ``kind`` field on each variant, and ``model_validate``
|
|
168
|
+
# routes dict input to the right class via that tag. Prior to this
|
|
169
|
+
# tagging, ``archive`` was typed ``Optional[Any]``, so a JSON round-trip
|
|
170
|
+
# left it as a plain dict — page-25's ``isinstance`` checks then
|
|
171
|
+
# misfired and rendered "Registry drift" for every state, even those
|
|
172
|
+
# with valid registry entries.
|
|
173
|
+
archive: Optional[
|
|
174
|
+
Annotated[
|
|
175
|
+
Union[ArchivePageRef, ArtifactReason],
|
|
176
|
+
Field(discriminator="kind"),
|
|
177
|
+
]
|
|
178
|
+
] = None
|
|
164
179
|
failure_cause: Optional[str] = None # populated only when status == FAILED
|
|
165
180
|
|
|
166
181
|
|
|
@@ -180,6 +195,39 @@ class PipelineRun(BaseModel):
|
|
|
180
195
|
tasks: list[TaskRow] = Field(default_factory=list)
|
|
181
196
|
failing_state: Optional[str] = None # populated only when status == FAILED
|
|
182
197
|
failure_cause: Optional[str] = None # populated only when status == FAILED
|
|
198
|
+
# The ``pipeline_role`` carried on this execution's input JSON
|
|
199
|
+
# (e.g. "weekly" / "daily" / "eod" / "smoke" / "recovery" /
|
|
200
|
+
# "shell-run" / "backfill" / "operator-replay"). None when the input
|
|
201
|
+
# JSON doesn't carry the field — typical of pre-Option-D executions
|
|
202
|
+
# and ad-hoc operator launches that haven't adopted the convention.
|
|
203
|
+
# The dashboard exposes this in the section header so the operator
|
|
204
|
+
# always knows whether they're looking at the canonical cadence run
|
|
205
|
+
# or a smoke / recovery overlay.
|
|
206
|
+
pipeline_role: Optional[str] = None
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class PipelineExecutionSummary(BaseModel):
|
|
210
|
+
"""Lightweight per-execution summary for the operator dropdown.
|
|
211
|
+
|
|
212
|
+
Returned by :func:`list_recent_pipeline_runs`. Does NOT carry the
|
|
213
|
+
full per-state task table (that lives on :class:`PipelineRun`) — the
|
|
214
|
+
dropdown's job is to let the operator pick one execution to inspect
|
|
215
|
+
in detail, at which point :func:`read_pipeline_state` returns the
|
|
216
|
+
full run for the chosen ARN.
|
|
217
|
+
|
|
218
|
+
``pipeline_role`` is parsed from the execution's input JSON via the
|
|
219
|
+
DescribeExecution call; None when the input lacks the field.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
model_config = _STRICT_CONFIG
|
|
223
|
+
|
|
224
|
+
execution_arn: str
|
|
225
|
+
name: str
|
|
226
|
+
status: RunStatus
|
|
227
|
+
start_utc: datetime
|
|
228
|
+
end_utc: Optional[datetime] = None
|
|
229
|
+
duration_sec: Optional[float] = None
|
|
230
|
+
pipeline_role: Optional[str] = None
|
|
183
231
|
|
|
184
232
|
|
|
185
233
|
# ── Helpers ───────────────────────────────────────────────────────────────
|
|
@@ -404,80 +452,79 @@ def _failing_state_from_history(history_events: list[dict]) -> Optional[str]:
|
|
|
404
452
|
return None
|
|
405
453
|
|
|
406
454
|
|
|
407
|
-
# ──
|
|
455
|
+
# ── Role-filter helpers (Option-D execution-picker substrate) ─────────────
|
|
408
456
|
|
|
409
457
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
typed :class:`PipelineRun`.
|
|
458
|
+
# Bounds the ListExecutions walk when a role filter is set — we page
|
|
459
|
+
# backwards through history looking for the first execution whose
|
|
460
|
+
# input.pipeline_role matches the filter. 50 is enough to span ~6 months
|
|
461
|
+
# of weekly cadence even if every intervening execution is a smoke /
|
|
462
|
+
# recovery overlay; raise it only if smoke-density is genuinely that high.
|
|
463
|
+
_DEFAULT_ROLE_SEARCH_LIMIT = 50
|
|
417
464
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
:class:`SFNNoExecutions`.
|
|
423
|
-
2. ``states:DescribeExecution(executionArn=...)`` — top-level status +
|
|
424
|
-
start/stop + failure cause.
|
|
425
|
-
3. ``states:GetExecutionHistory(executionArn=..., maxResults=1000)`` —
|
|
426
|
-
per-state events for the Task row table.
|
|
427
|
-
|
|
428
|
-
Parameters
|
|
429
|
-
----------
|
|
430
|
-
state_machine_arn:
|
|
431
|
-
Full SF ARN, e.g. ``arn:aws:states:us-east-1:711398986525:stateMachine:alpha-engine-saturday-pipeline``.
|
|
432
|
-
client:
|
|
433
|
-
Optional boto3 ``stepfunctions`` client. Tests pass a mock here;
|
|
434
|
-
production passes None and gets a fresh client per call (cheap;
|
|
435
|
-
boto3 caches under the hood).
|
|
465
|
+
# ListExecutions page size — boto3 caps at 1000 but we keep pages small
|
|
466
|
+
# so a typical "find the most-recent weekly within the last 50" walk only
|
|
467
|
+
# hits the API once or twice.
|
|
468
|
+
_LIST_EXECUTIONS_PAGE_SIZE = 25
|
|
436
469
|
|
|
437
|
-
Returns
|
|
438
|
-
-------
|
|
439
|
-
PipelineRun
|
|
440
|
-
Fully populated except when ``status == NOT_RUN`` (only
|
|
441
|
-
``state_machine_arn`` + ``pretty_label`` + ``status`` set).
|
|
442
470
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
SFNAccessDenied
|
|
446
|
-
IAM denial on any of the three required actions.
|
|
447
|
-
SFNThrottled
|
|
448
|
-
Rate-limit on any of the three.
|
|
449
|
-
SFNNoExecutions
|
|
450
|
-
SF exists but has zero executions ever.
|
|
451
|
-
PipelineStatusError
|
|
452
|
-
Any other unexpected error path — the caller renders a red banner.
|
|
453
|
-
"""
|
|
454
|
-
if client is None: # pragma: no cover — production path
|
|
455
|
-
import boto3
|
|
471
|
+
def _extract_pipeline_role(describe_resp: dict) -> Optional[str]:
|
|
472
|
+
"""Parse ``input.pipeline_role`` from a DescribeExecution response.
|
|
456
473
|
|
|
457
|
-
|
|
474
|
+
DescribeExecution returns ``input`` as a JSON-encoded string. The
|
|
475
|
+
Option-D convention is that all cron-triggered executions carry a
|
|
476
|
+
``pipeline_role`` field at top level (``{"pipeline_role": "weekly",
|
|
477
|
+
...}``) and ad-hoc operator launches set it explicitly (smoke /
|
|
478
|
+
recovery / operator-replay / etc).
|
|
458
479
|
|
|
459
|
-
|
|
480
|
+
Returns None on:
|
|
481
|
+
- missing ``input`` field
|
|
482
|
+
- malformed JSON (logged at WARN; the page renders "role: unknown")
|
|
483
|
+
- JSON parses but ``pipeline_role`` is absent
|
|
460
484
|
|
|
461
|
-
|
|
485
|
+
Permissive on parse failures (warn + return None rather than raise)
|
|
486
|
+
because input-shape is operator-controlled and we'd rather show the
|
|
487
|
+
execution with role=None than blackhole the whole page on a malformed
|
|
488
|
+
input JSON. Per ``feedback_no_silent_fails`` the WARN log is the
|
|
489
|
+
recording surface.
|
|
490
|
+
"""
|
|
491
|
+
raw_input = describe_resp.get("input")
|
|
492
|
+
if not raw_input or not isinstance(raw_input, str):
|
|
493
|
+
return None
|
|
462
494
|
try:
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
495
|
+
parsed = json.loads(raw_input)
|
|
496
|
+
except (ValueError, TypeError) as exc:
|
|
497
|
+
logger.warning(
|
|
498
|
+
"Could not parse SF execution input JSON; pipeline_role=None: %s", exc
|
|
466
499
|
)
|
|
467
|
-
|
|
468
|
-
|
|
500
|
+
return None
|
|
501
|
+
if not isinstance(parsed, dict):
|
|
502
|
+
return None
|
|
503
|
+
role = parsed.get("pipeline_role")
|
|
504
|
+
return role if isinstance(role, str) and role else None
|
|
469
505
|
|
|
470
|
-
executions = list_resp.get("executions") or []
|
|
471
|
-
if not executions:
|
|
472
|
-
raise SFNNoExecutions(
|
|
473
|
-
f"State machine {state_machine_arn} has no executions yet."
|
|
474
|
-
)
|
|
475
506
|
|
|
476
|
-
|
|
477
|
-
execution_arn
|
|
478
|
-
|
|
507
|
+
def _build_pipeline_run_from_execution_arn(
|
|
508
|
+
execution_arn: str,
|
|
509
|
+
state_machine_arn: str,
|
|
510
|
+
*,
|
|
511
|
+
client: "SFNClient",
|
|
512
|
+
) -> PipelineRun:
|
|
513
|
+
"""Project a known execution ARN onto a typed :class:`PipelineRun`.
|
|
514
|
+
|
|
515
|
+
Helper that holds the DescribeExecution + GetExecutionHistory +
|
|
516
|
+
materialize-tasks pipeline. Callers responsible for the execution
|
|
517
|
+
name (passed in via the ARN — derived if not supplied separately).
|
|
518
|
+
|
|
519
|
+
Used by :func:`read_pipeline_state` after the role-filter walk picks
|
|
520
|
+
the target execution, AND directly when an operator clicks a specific
|
|
521
|
+
execution in the dropdown.
|
|
522
|
+
"""
|
|
523
|
+
label = _label_for_arn(state_machine_arn)
|
|
524
|
+
# Derive execution_name from ARN — the ARN tail is
|
|
525
|
+
# ``execution:<sm-name>:<execution-name>``.
|
|
526
|
+
execution_name = execution_arn.rsplit(":", 1)[-1] if execution_arn else None
|
|
479
527
|
|
|
480
|
-
# 2. DescribeExecution
|
|
481
528
|
try:
|
|
482
529
|
describe_resp = client.describe_execution(executionArn=execution_arn)
|
|
483
530
|
except Exception as exc: # noqa: BLE001 — narrow + re-raise
|
|
@@ -487,8 +534,6 @@ def read_pipeline_state(
|
|
|
487
534
|
try:
|
|
488
535
|
run_status = RunStatus(status_str)
|
|
489
536
|
except ValueError:
|
|
490
|
-
# Unknown status string from boto3 (forward-compatibility) — fail
|
|
491
|
-
# loud rather than silently mis-render.
|
|
492
537
|
raise PipelineStatusError(
|
|
493
538
|
f"Unknown SF execution status {status_str!r} from boto3 for {execution_arn}"
|
|
494
539
|
)
|
|
@@ -502,8 +547,8 @@ def read_pipeline_state(
|
|
|
502
547
|
failure_cause = (
|
|
503
548
|
_failure_cause_from(describe_resp) if run_status == RunStatus.FAILED else None
|
|
504
549
|
)
|
|
550
|
+
pipeline_role = _extract_pipeline_role(describe_resp)
|
|
505
551
|
|
|
506
|
-
# 3. GetExecutionHistory
|
|
507
552
|
try:
|
|
508
553
|
history_resp = client.get_execution_history(
|
|
509
554
|
executionArn=execution_arn,
|
|
@@ -531,9 +576,288 @@ def read_pipeline_state(
|
|
|
531
576
|
tasks=tasks,
|
|
532
577
|
failing_state=failing_state,
|
|
533
578
|
failure_cause=failure_cause,
|
|
579
|
+
pipeline_role=pipeline_role,
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def _find_execution_matching_role(
|
|
584
|
+
state_machine_arn: str,
|
|
585
|
+
role_filter: set[str],
|
|
586
|
+
*,
|
|
587
|
+
client: "SFNClient",
|
|
588
|
+
search_limit: int,
|
|
589
|
+
) -> Optional[tuple[str, Optional[str]]]:
|
|
590
|
+
"""Walk ListExecutions pages until finding an execution whose
|
|
591
|
+
``input.pipeline_role`` ∈ ``role_filter``, or until ``search_limit``
|
|
592
|
+
executions have been inspected.
|
|
593
|
+
|
|
594
|
+
Returns ``(execution_arn, role)`` on hit, ``None`` on exhaustion.
|
|
595
|
+
The N+1 DescribeExecution calls are the cost of the role filter;
|
|
596
|
+
typical cron-cadence SFs find a match within the first 1-3 executions
|
|
597
|
+
so the cost is bounded in practice. Smoke-heavy windows pay more but
|
|
598
|
+
the ``search_limit`` cap bounds worst case.
|
|
599
|
+
|
|
600
|
+
Caller is responsible for translating None into the right outcome —
|
|
601
|
+
either SFNNoExecutions (when ListExecutions was empty in the first
|
|
602
|
+
page) or a "no execution matches filter" fallback signal.
|
|
603
|
+
"""
|
|
604
|
+
inspected = 0
|
|
605
|
+
next_token: Optional[str] = None
|
|
606
|
+
while inspected < search_limit:
|
|
607
|
+
kwargs: dict[str, Any] = {
|
|
608
|
+
"stateMachineArn": state_machine_arn,
|
|
609
|
+
"maxResults": min(_LIST_EXECUTIONS_PAGE_SIZE, search_limit - inspected),
|
|
610
|
+
}
|
|
611
|
+
if next_token:
|
|
612
|
+
kwargs["nextToken"] = next_token
|
|
613
|
+
try:
|
|
614
|
+
list_resp = client.list_executions(**kwargs)
|
|
615
|
+
except Exception as exc: # noqa: BLE001 — narrow + re-raise
|
|
616
|
+
_raise_for_boto_error(exc, "ListExecutions")
|
|
617
|
+
|
|
618
|
+
executions = list_resp.get("executions") or []
|
|
619
|
+
if not executions:
|
|
620
|
+
return None
|
|
621
|
+
for ex in executions:
|
|
622
|
+
inspected += 1
|
|
623
|
+
execution_arn = ex.get("executionArn")
|
|
624
|
+
if not execution_arn:
|
|
625
|
+
continue
|
|
626
|
+
try:
|
|
627
|
+
describe_resp = client.describe_execution(executionArn=execution_arn)
|
|
628
|
+
except Exception as exc: # noqa: BLE001 — narrow + re-raise
|
|
629
|
+
_raise_for_boto_error(exc, "DescribeExecution")
|
|
630
|
+
role = _extract_pipeline_role(describe_resp)
|
|
631
|
+
if role is not None and role in role_filter:
|
|
632
|
+
return execution_arn, role
|
|
633
|
+
|
|
634
|
+
next_token = list_resp.get("nextToken")
|
|
635
|
+
if not next_token:
|
|
636
|
+
return None
|
|
637
|
+
|
|
638
|
+
return None
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
# ── Public entry point ────────────────────────────────────────────────────
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def read_pipeline_state(
|
|
645
|
+
state_machine_arn: str,
|
|
646
|
+
*,
|
|
647
|
+
role_filter: Optional[set[str]] = None,
|
|
648
|
+
search_limit: int = _DEFAULT_ROLE_SEARCH_LIMIT,
|
|
649
|
+
execution_arn: Optional[str] = None,
|
|
650
|
+
client: Optional["SFNClient"] = None,
|
|
651
|
+
) -> PipelineRun:
|
|
652
|
+
"""Project the chosen execution of ``state_machine_arn`` onto a typed
|
|
653
|
+
:class:`PipelineRun`.
|
|
654
|
+
|
|
655
|
+
Default behavior (no ``role_filter``, no ``execution_arn``) is
|
|
656
|
+
backwards-compatible: returns the most-recent execution per
|
|
657
|
+
``ListExecutions maxResults=1``, same as pre-Option-D.
|
|
658
|
+
|
|
659
|
+
Option-D execution-picker semantics:
|
|
660
|
+
|
|
661
|
+
- When ``execution_arn`` is set, fetches that specific execution
|
|
662
|
+
directly (bypasses ListExecutions). Used by the dashboard's
|
|
663
|
+
dropdown "click a row to inspect this execution" path.
|
|
664
|
+
- When ``role_filter`` is set, walks ListExecutions pages until
|
|
665
|
+
finding the most-recent execution whose ``input.pipeline_role``
|
|
666
|
+
is in the filter set. If none match within ``search_limit``
|
|
667
|
+
executions, raises :class:`SFNNoExecutions` with a message naming
|
|
668
|
+
the filter — the caller (page 25) renders a banner like "No
|
|
669
|
+
'weekly' execution in the last 50 runs; click 'View other recent
|
|
670
|
+
executions' to inspect what's actually been running."
|
|
671
|
+
|
|
672
|
+
Parameters
|
|
673
|
+
----------
|
|
674
|
+
state_machine_arn:
|
|
675
|
+
Full SF ARN.
|
|
676
|
+
role_filter:
|
|
677
|
+
Optional set of ``pipeline_role`` values to filter executions by
|
|
678
|
+
(e.g. ``{"weekly"}`` for the Saturday-SF cadence run, ``{"daily"}``
|
|
679
|
+
for the Weekday-SF cadence run). ``None`` = no filter (most-recent
|
|
680
|
+
regardless of role — current behavior).
|
|
681
|
+
search_limit:
|
|
682
|
+
Bounds the role-filter walk. Default 50 — see
|
|
683
|
+
:data:`_DEFAULT_ROLE_SEARCH_LIMIT`. Ignored when ``role_filter``
|
|
684
|
+
is None.
|
|
685
|
+
execution_arn:
|
|
686
|
+
Optional specific execution ARN to fetch. When set, both
|
|
687
|
+
``role_filter`` and ``search_limit`` are ignored.
|
|
688
|
+
client:
|
|
689
|
+
Optional boto3 ``stepfunctions`` client. Tests pass a mock here;
|
|
690
|
+
production passes None.
|
|
691
|
+
|
|
692
|
+
Raises
|
|
693
|
+
------
|
|
694
|
+
SFNAccessDenied
|
|
695
|
+
IAM denial on any of the three required actions.
|
|
696
|
+
SFNThrottled
|
|
697
|
+
Rate-limit on any of the three.
|
|
698
|
+
SFNNoExecutions
|
|
699
|
+
SF has zero executions, OR ``role_filter`` is set and no
|
|
700
|
+
execution within the search window matches.
|
|
701
|
+
PipelineStatusError
|
|
702
|
+
Any other unexpected error path.
|
|
703
|
+
"""
|
|
704
|
+
if client is None: # pragma: no cover — production path
|
|
705
|
+
import boto3
|
|
706
|
+
|
|
707
|
+
client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
|
|
708
|
+
|
|
709
|
+
# Path 1: explicit execution_arn — fetch directly.
|
|
710
|
+
if execution_arn is not None:
|
|
711
|
+
return _build_pipeline_run_from_execution_arn(
|
|
712
|
+
execution_arn, state_machine_arn, client=client
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
# Path 2: role_filter — walk ListExecutions until match.
|
|
716
|
+
if role_filter:
|
|
717
|
+
match = _find_execution_matching_role(
|
|
718
|
+
state_machine_arn, role_filter, client=client, search_limit=search_limit
|
|
719
|
+
)
|
|
720
|
+
if match is None:
|
|
721
|
+
raise SFNNoExecutions(
|
|
722
|
+
f"No execution with pipeline_role in {sorted(role_filter)!r} "
|
|
723
|
+
f"found within last {search_limit} executions of {state_machine_arn}."
|
|
724
|
+
)
|
|
725
|
+
matched_arn, _matched_role = match
|
|
726
|
+
return _build_pipeline_run_from_execution_arn(
|
|
727
|
+
matched_arn, state_machine_arn, client=client
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
# Path 3 (default): most-recent execution regardless of role —
|
|
731
|
+
# backwards-compatible with pre-Option-D callers.
|
|
732
|
+
try:
|
|
733
|
+
list_resp = client.list_executions(
|
|
734
|
+
stateMachineArn=state_machine_arn,
|
|
735
|
+
maxResults=1,
|
|
736
|
+
)
|
|
737
|
+
except Exception as exc: # noqa: BLE001 — narrow + re-raise
|
|
738
|
+
_raise_for_boto_error(exc, "ListExecutions")
|
|
739
|
+
|
|
740
|
+
executions = list_resp.get("executions") or []
|
|
741
|
+
if not executions:
|
|
742
|
+
raise SFNNoExecutions(
|
|
743
|
+
f"State machine {state_machine_arn} has no executions yet."
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
latest = executions[0]
|
|
747
|
+
return _build_pipeline_run_from_execution_arn(
|
|
748
|
+
latest.get("executionArn"), state_machine_arn, client=client
|
|
534
749
|
)
|
|
535
750
|
|
|
536
751
|
|
|
752
|
+
def list_recent_pipeline_runs(
|
|
753
|
+
state_machine_arn: str,
|
|
754
|
+
*,
|
|
755
|
+
limit: int = 10,
|
|
756
|
+
role_filter: Optional[set[str]] = None,
|
|
757
|
+
client: Optional["SFNClient"] = None,
|
|
758
|
+
) -> list[PipelineExecutionSummary]:
|
|
759
|
+
"""Return lightweight summaries of the most-recent N executions.
|
|
760
|
+
|
|
761
|
+
Backs the page-25 "View other recent executions" disclosure: shows
|
|
762
|
+
the operator what's been running on this SF, ranked most-recent
|
|
763
|
+
first, with the ``pipeline_role`` of each so smoke vs. weekly vs.
|
|
764
|
+
recovery is visible at a glance.
|
|
765
|
+
|
|
766
|
+
Each summary requires one ``DescribeExecution`` call (to extract
|
|
767
|
+
``pipeline_role`` from the input JSON) on top of one
|
|
768
|
+
``ListExecutions`` call, so this is O(limit) API calls. Default
|
|
769
|
+
``limit=10`` puts the dashboard's "show me last N" view at ~11
|
|
770
|
+
SF API calls per page render — well within the 25-TPS soft limit
|
|
771
|
+
states:DescribeExecution applies.
|
|
772
|
+
|
|
773
|
+
Parameters
|
|
774
|
+
----------
|
|
775
|
+
state_machine_arn:
|
|
776
|
+
Full SF ARN.
|
|
777
|
+
limit:
|
|
778
|
+
Max number of executions to return. Default 10.
|
|
779
|
+
role_filter:
|
|
780
|
+
Optional pre-filter (returns only executions whose
|
|
781
|
+
``pipeline_role`` ∈ ``role_filter``). When set, the API call
|
|
782
|
+
budget grows because we may have to walk past role-mismatched
|
|
783
|
+
executions; bounded by an internal walk cap of ``limit * 5``.
|
|
784
|
+
client:
|
|
785
|
+
Optional boto3 ``stepfunctions`` client.
|
|
786
|
+
"""
|
|
787
|
+
if client is None: # pragma: no cover — production path
|
|
788
|
+
import boto3
|
|
789
|
+
|
|
790
|
+
client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
|
|
791
|
+
|
|
792
|
+
walk_cap = limit if role_filter is None else min(limit * 5, _DEFAULT_ROLE_SEARCH_LIMIT)
|
|
793
|
+
summaries: list[PipelineExecutionSummary] = []
|
|
794
|
+
inspected = 0
|
|
795
|
+
next_token: Optional[str] = None
|
|
796
|
+
|
|
797
|
+
while len(summaries) < limit and inspected < walk_cap:
|
|
798
|
+
kwargs: dict[str, Any] = {
|
|
799
|
+
"stateMachineArn": state_machine_arn,
|
|
800
|
+
"maxResults": min(_LIST_EXECUTIONS_PAGE_SIZE, walk_cap - inspected),
|
|
801
|
+
}
|
|
802
|
+
if next_token:
|
|
803
|
+
kwargs["nextToken"] = next_token
|
|
804
|
+
try:
|
|
805
|
+
list_resp = client.list_executions(**kwargs)
|
|
806
|
+
except Exception as exc: # noqa: BLE001 — narrow + re-raise
|
|
807
|
+
_raise_for_boto_error(exc, "ListExecutions")
|
|
808
|
+
|
|
809
|
+
executions = list_resp.get("executions") or []
|
|
810
|
+
if not executions:
|
|
811
|
+
break
|
|
812
|
+
for ex in executions:
|
|
813
|
+
inspected += 1
|
|
814
|
+
execution_arn = ex.get("executionArn")
|
|
815
|
+
if not execution_arn:
|
|
816
|
+
continue
|
|
817
|
+
try:
|
|
818
|
+
describe_resp = client.describe_execution(executionArn=execution_arn)
|
|
819
|
+
except Exception as exc: # noqa: BLE001 — narrow + re-raise
|
|
820
|
+
_raise_for_boto_error(exc, "DescribeExecution")
|
|
821
|
+
role = _extract_pipeline_role(describe_resp)
|
|
822
|
+
if role_filter is not None and role not in role_filter:
|
|
823
|
+
continue
|
|
824
|
+
status_str = describe_resp.get("status", "RUNNING")
|
|
825
|
+
try:
|
|
826
|
+
status = RunStatus(status_str)
|
|
827
|
+
except ValueError:
|
|
828
|
+
raise PipelineStatusError(
|
|
829
|
+
f"Unknown SF execution status {status_str!r} from boto3 for {execution_arn}"
|
|
830
|
+
)
|
|
831
|
+
start_utc = _parse_ts(describe_resp.get("startDate"))
|
|
832
|
+
end_utc = _parse_ts(describe_resp.get("stopDate"))
|
|
833
|
+
duration: Optional[float] = None
|
|
834
|
+
if start_utc is not None and end_utc is not None:
|
|
835
|
+
duration = (end_utc - start_utc).total_seconds()
|
|
836
|
+
if start_utc is None:
|
|
837
|
+
# An execution without a start time is degenerate; skip
|
|
838
|
+
# rather than fail the whole list.
|
|
839
|
+
continue
|
|
840
|
+
summaries.append(
|
|
841
|
+
PipelineExecutionSummary(
|
|
842
|
+
execution_arn=execution_arn,
|
|
843
|
+
name=ex.get("name") or execution_arn.rsplit(":", 1)[-1],
|
|
844
|
+
status=status,
|
|
845
|
+
start_utc=start_utc,
|
|
846
|
+
end_utc=end_utc,
|
|
847
|
+
duration_sec=duration,
|
|
848
|
+
pipeline_role=role,
|
|
849
|
+
)
|
|
850
|
+
)
|
|
851
|
+
if len(summaries) >= limit:
|
|
852
|
+
break
|
|
853
|
+
|
|
854
|
+
next_token = list_resp.get("nextToken")
|
|
855
|
+
if not next_token:
|
|
856
|
+
break
|
|
857
|
+
|
|
858
|
+
return summaries
|
|
859
|
+
|
|
860
|
+
|
|
537
861
|
def _raise_for_boto_error(exc: Exception, action: str) -> None:
|
|
538
862
|
"""Translate a boto3 exception into a typed PipelineStatusError.
|
|
539
863
|
|