alpha-engine-lib 0.35.0__tar.gz → 0.36.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/PKG-INFO +1 -1
  2. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/pyproject.toml +1 -1
  3. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/__init__.py +1 -1
  4. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/__init__.py +4 -0
  5. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/read.py +390 -66
  6. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/registry.py +64 -36
  7. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/PKG-INFO +1 -1
  8. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_read.py +321 -0
  9. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_registry.py +15 -5
  10. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/README.md +0 -0
  11. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/setup.cfg +0 -0
  12. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/agent_schemas.py +0 -0
  13. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/alerts.py +0 -0
  14. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/arcticdb.py +0 -0
  15. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/collector_results.py +0 -0
  16. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/cost.py +0 -0
  17. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/dates.py +0 -0
  18. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/decision_capture.py +0 -0
  19. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ec2_spot.py +0 -0
  20. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/email_sender.py +0 -0
  21. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/eval_artifacts.py +0 -0
  22. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/logging.py +0 -0
  23. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/model_pricing.yaml +0 -0
  24. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pillars.py +0 -0
  25. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/templates.py +0 -0
  26. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/preflight.py +0 -0
  27. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/__init__.py +0 -0
  28. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/db.py +0 -0
  29. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/embeddings.py +0 -0
  30. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/migrations/0001_content_tsv.sql +0 -0
  31. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/rerank.py +0 -0
  32. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/retrieval.py +0 -0
  33. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/schema.sql +0 -0
  34. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/reconcile.py +0 -0
  35. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/secrets.py +0 -0
  36. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/sources/__init__.py +0 -0
  37. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/sources/protocols.py +0 -0
  38. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ssm_dispatcher.py +0 -0
  39. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ssm_log_capture.py +0 -0
  40. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/telegram.py +0 -0
  41. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/trading_calendar.py +0 -0
  42. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/transparency.py +0 -0
  43. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/transparency_inventory.yaml +0 -0
  44. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/universe.py +0 -0
  45. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/SOURCES.txt +0 -0
  46. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/dependency_links.txt +0 -0
  47. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/requires.txt +0 -0
  48. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/top_level.txt +0 -0
  49. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_agent_schemas.py +0 -0
  50. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_alerts.py +0 -0
  51. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_arcticdb.py +0 -0
  52. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_collector_results.py +0 -0
  53. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_cost.py +0 -0
  54. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_dates.py +0 -0
  55. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_decision_capture.py +0 -0
  56. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_ec2_spot.py +0 -0
  57. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_email_sender.py +0 -0
  58. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_eval_artifacts.py +0 -0
  59. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_logging.py +0 -0
  60. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pillars.py +0 -0
  61. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_templates.py +0 -0
  62. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_preflight.py +0 -0
  63. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_rag.py +0 -0
  64. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_rag_rerank.py +0 -0
  65. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_rag_retrieval_hybrid.py +0 -0
  66. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_reconcile.py +0 -0
  67. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_secrets.py +0 -0
  68. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_sources_protocols.py +0 -0
  69. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_ssm_dispatcher.py +0 -0
  70. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_ssm_log_capture.py +0 -0
  71. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_telegram.py +0 -0
  72. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_trading_calendar.py +0 -0
  73. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_transparency.py +0 -0
  74. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_universe.py +0 -0
  75. {alpha_engine_lib-0.35.0 → alpha_engine_lib-0.36.0}/tests/test_version_pin.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alpha-engine-lib
3
- Version: 0.35.0
3
+ Version: 0.36.0
4
4
  Summary: Shared utilities for the Alpha Engine modules: preflight, structured logging with secret-redaction, ArcticDB universe access, NYSE-calendar dates + freshness predicates, decision capture, cost telemetry, RAG, agent output schemas, SSM-backed secrets, Telegram alerts + SNS fan-out, EC2 spot-launch resilience, SSM log-capture chokepoint, SSM send-command + poll chokepoint, and Step-Functions execution-state projection. Full surface documented in README.
5
5
  Author: Brian McMahon
6
6
  License: Proprietary
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alpha-engine-lib"
7
- version = "0.35.0"
7
+ version = "0.36.0"
8
8
  description = "Shared utilities for the Alpha Engine modules: preflight, structured logging with secret-redaction, ArcticDB universe access, NYSE-calendar dates + freshness predicates, decision capture, cost telemetry, RAG, agent output schemas, SSM-backed secrets, Telegram alerts + SNS fan-out, EC2 spot-launch resilience, SSM log-capture chokepoint, SSM send-command + poll chokepoint, and Step-Functions execution-state projection. Full surface documented in README."
9
9
  readme = "README.md"
10
10
  # EC2 still runs Python 3.9 on the always-on micro instance (boto3 drops
@@ -1,3 +1,3 @@
1
1
  """alpha-engine-lib — shared utilities for Alpha Engine modules."""
2
2
 
3
- __version__ = "0.35.0"
3
+ __version__ = "0.36.0"
@@ -31,6 +31,7 @@ on first build, not after the second consumer arrives.
31
31
  from __future__ import annotations
32
32
 
33
33
  from .read import (
34
+ PipelineExecutionSummary,
34
35
  PipelineRun,
35
36
  RunStatus,
36
37
  SFNAccessDenied,
@@ -38,6 +39,7 @@ from .read import (
38
39
  SFNThrottled,
39
40
  TaskRow,
40
41
  TaskStatus,
42
+ list_recent_pipeline_runs,
41
43
  read_pipeline_state,
42
44
  )
43
45
  from .registry import (
@@ -54,6 +56,7 @@ __all__ = [
54
56
  "ArchivePageRef",
55
57
  "ArtifactReason",
56
58
  "PIPELINE_LABELS",
59
+ "PipelineExecutionSummary",
57
60
  "PipelineRun",
58
61
  "RunStatus",
59
62
  "SFNAccessDenied",
@@ -66,5 +69,6 @@ __all__ = [
66
69
  "WAIT_GROUPING",
67
70
  "format_failure_message",
68
71
  "format_success_message",
72
+ "list_recent_pipeline_runs",
69
73
  "read_pipeline_state",
70
74
  ]
@@ -31,11 +31,12 @@ red banner always names a specific cause.
31
31
 
32
32
  from __future__ import annotations
33
33
 
34
+ import json
34
35
  import logging
35
36
  from dataclasses import dataclass
36
37
  from datetime import datetime, timezone
37
38
  from enum import Enum
38
- from typing import TYPE_CHECKING, Any, Optional
39
+ from typing import TYPE_CHECKING, Annotated, Any, Optional, Union
39
40
 
40
41
  from pydantic import BaseModel, ConfigDict, Field
41
42
 
@@ -160,7 +161,21 @@ class TaskRow(BaseModel):
160
161
  # substrate-only reason). ``None`` here means "state name not in the
161
162
  # registry" and is a CI-time bug — the consumer should treat it as a
162
163
  # registry-drift signal, not a renderable placeholder.
163
- archive: Optional[Any] = None # ArchivePageRef | ArtifactReason | None
164
+ #
165
+ # The Annotated[Union[...], Field(discriminator="kind")] form is the
166
+ # SOTA tagged-union pattern for Pydantic V2: ``model_dump(mode="json")``
167
+ # serializes the ``kind`` field on each variant, and ``model_validate``
168
+ # routes dict input to the right class via that tag. Prior to this
169
+ # tagging, ``archive`` was typed ``Optional[Any]``, so a JSON round-trip
170
+ # left it as a plain dict — page-25's ``isinstance`` checks then
171
+ # misfired and rendered "Registry drift" for every state, even those
172
+ # with valid registry entries.
173
+ archive: Optional[
174
+ Annotated[
175
+ Union[ArchivePageRef, ArtifactReason],
176
+ Field(discriminator="kind"),
177
+ ]
178
+ ] = None
164
179
  failure_cause: Optional[str] = None # populated only when status == FAILED
165
180
 
166
181
 
@@ -180,6 +195,39 @@ class PipelineRun(BaseModel):
180
195
  tasks: list[TaskRow] = Field(default_factory=list)
181
196
  failing_state: Optional[str] = None # populated only when status == FAILED
182
197
  failure_cause: Optional[str] = None # populated only when status == FAILED
198
+ # The ``pipeline_role`` carried on this execution's input JSON
199
+ # (e.g. "weekly" / "daily" / "eod" / "smoke" / "recovery" /
200
+ # "shell-run" / "backfill" / "operator-replay"). None when the input
201
+ # JSON doesn't carry the field — typical of pre-Option-D executions
202
+ # and ad-hoc operator launches that haven't adopted the convention.
203
+ # The dashboard exposes this in the section header so the operator
204
+ # always knows whether they're looking at the canonical cadence run
205
+ # or a smoke / recovery overlay.
206
+ pipeline_role: Optional[str] = None
207
+
208
+
209
+ class PipelineExecutionSummary(BaseModel):
210
+ """Lightweight per-execution summary for the operator dropdown.
211
+
212
+ Returned by :func:`list_recent_pipeline_runs`. Does NOT carry the
213
+ full per-state task table (that lives on :class:`PipelineRun`) — the
214
+ dropdown's job is to let the operator pick one execution to inspect
215
+ in detail, at which point :func:`read_pipeline_state` returns the
216
+ full run for the chosen ARN.
217
+
218
+ ``pipeline_role`` is parsed from the execution's input JSON via the
219
+ DescribeExecution call; None when the input lacks the field.
220
+ """
221
+
222
+ model_config = _STRICT_CONFIG
223
+
224
+ execution_arn: str
225
+ name: str
226
+ status: RunStatus
227
+ start_utc: datetime
228
+ end_utc: Optional[datetime] = None
229
+ duration_sec: Optional[float] = None
230
+ pipeline_role: Optional[str] = None
183
231
 
184
232
 
185
233
  # ── Helpers ───────────────────────────────────────────────────────────────
@@ -404,80 +452,79 @@ def _failing_state_from_history(history_events: list[dict]) -> Optional[str]:
404
452
  return None
405
453
 
406
454
 
407
- # ── Public entry point ────────────────────────────────────────────────────
455
+ # ── Role-filter helpers (Option-D execution-picker substrate) ─────────────
408
456
 
409
457
 
410
- def read_pipeline_state(
411
- state_machine_arn: str,
412
- *,
413
- client: Optional["SFNClient"] = None,
414
- ) -> PipelineRun:
415
- """Project the most-recent execution of ``state_machine_arn`` onto a
416
- typed :class:`PipelineRun`.
458
+ # Bounds the ListExecutions walk when a role filter is set — we page
459
+ # backwards through history looking for the first execution whose
460
+ # input.pipeline_role matches the filter. 50 is enough to span ~6 months
461
+ # of weekly cadence even if every intervening execution is a smoke /
462
+ # recovery overlay; raise it only if smoke-density is genuinely that high.
463
+ _DEFAULT_ROLE_SEARCH_LIMIT = 50
417
464
 
418
- Calls (in order):
419
-
420
- 1. ``states:ListExecutions(stateMachineArn=..., maxResults=1)`` finds
421
- the latest execution arn. If the SF has zero executions, raises
422
- :class:`SFNNoExecutions`.
423
- 2. ``states:DescribeExecution(executionArn=...)`` — top-level status +
424
- start/stop + failure cause.
425
- 3. ``states:GetExecutionHistory(executionArn=..., maxResults=1000)`` —
426
- per-state events for the Task row table.
427
-
428
- Parameters
429
- ----------
430
- state_machine_arn:
431
- Full SF ARN, e.g. ``arn:aws:states:us-east-1:711398986525:stateMachine:alpha-engine-saturday-pipeline``.
432
- client:
433
- Optional boto3 ``stepfunctions`` client. Tests pass a mock here;
434
- production passes None and gets a fresh client per call (cheap;
435
- boto3 caches under the hood).
465
+ # ListExecutions page size — boto3 caps at 1000 but we keep pages small
466
+ # so a typical "find the most-recent weekly within the last 50" walk only
467
+ # hits the API once or twice.
468
+ _LIST_EXECUTIONS_PAGE_SIZE = 25
436
469
 
437
- Returns
438
- -------
439
- PipelineRun
440
- Fully populated except when ``status == NOT_RUN`` (only
441
- ``state_machine_arn`` + ``pretty_label`` + ``status`` set).
442
470
 
443
- Raises
444
- ------
445
- SFNAccessDenied
446
- IAM denial on any of the three required actions.
447
- SFNThrottled
448
- Rate-limit on any of the three.
449
- SFNNoExecutions
450
- SF exists but has zero executions ever.
451
- PipelineStatusError
452
- Any other unexpected error path — the caller renders a red banner.
453
- """
454
- if client is None: # pragma: no cover — production path
455
- import boto3
471
+ def _extract_pipeline_role(describe_resp: dict) -> Optional[str]:
472
+ """Parse ``input.pipeline_role`` from a DescribeExecution response.
456
473
 
457
- client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
474
+ DescribeExecution returns ``input`` as a JSON-encoded string. The
475
+ Option-D convention is that all cron-triggered executions carry a
476
+ ``pipeline_role`` field at top level (``{"pipeline_role": "weekly",
477
+ ...}``) and ad-hoc operator launches set it explicitly (smoke /
478
+ recovery / operator-replay / etc).
458
479
 
459
- label = _label_for_arn(state_machine_arn)
480
+ Returns None on:
481
+ - missing ``input`` field
482
+ - malformed JSON (logged at WARN; the page renders "role: unknown")
483
+ - JSON parses but ``pipeline_role`` is absent
460
484
 
461
- # 1. ListExecutions
485
+ Permissive on parse failures (warn + return None rather than raise)
486
+ because input-shape is operator-controlled and we'd rather show the
487
+ execution with role=None than blackhole the whole page on a malformed
488
+ input JSON. Per ``feedback_no_silent_fails`` the WARN log is the
489
+ recording surface.
490
+ """
491
+ raw_input = describe_resp.get("input")
492
+ if not raw_input or not isinstance(raw_input, str):
493
+ return None
462
494
  try:
463
- list_resp = client.list_executions(
464
- stateMachineArn=state_machine_arn,
465
- maxResults=1,
495
+ parsed = json.loads(raw_input)
496
+ except (ValueError, TypeError) as exc:
497
+ logger.warning(
498
+ "Could not parse SF execution input JSON; pipeline_role=None: %s", exc
466
499
  )
467
- except Exception as exc: # noqa: BLE001 — narrow + re-raise
468
- _raise_for_boto_error(exc, "ListExecutions")
500
+ return None
501
+ if not isinstance(parsed, dict):
502
+ return None
503
+ role = parsed.get("pipeline_role")
504
+ return role if isinstance(role, str) and role else None
469
505
 
470
- executions = list_resp.get("executions") or []
471
- if not executions:
472
- raise SFNNoExecutions(
473
- f"State machine {state_machine_arn} has no executions yet."
474
- )
475
506
 
476
- latest = executions[0]
477
- execution_arn = latest.get("executionArn")
478
- execution_name = latest.get("name")
507
+ def _build_pipeline_run_from_execution_arn(
508
+ execution_arn: str,
509
+ state_machine_arn: str,
510
+ *,
511
+ client: "SFNClient",
512
+ ) -> PipelineRun:
513
+ """Project a known execution ARN onto a typed :class:`PipelineRun`.
514
+
515
+ Helper that holds the DescribeExecution + GetExecutionHistory +
516
+ materialize-tasks pipeline. Callers responsible for the execution
517
+ name (passed in via the ARN — derived if not supplied separately).
518
+
519
+ Used by :func:`read_pipeline_state` after the role-filter walk picks
520
+ the target execution, AND directly when an operator clicks a specific
521
+ execution in the dropdown.
522
+ """
523
+ label = _label_for_arn(state_machine_arn)
524
+ # Derive execution_name from ARN — the ARN tail is
525
+ # ``execution:<sm-name>:<execution-name>``.
526
+ execution_name = execution_arn.rsplit(":", 1)[-1] if execution_arn else None
479
527
 
480
- # 2. DescribeExecution
481
528
  try:
482
529
  describe_resp = client.describe_execution(executionArn=execution_arn)
483
530
  except Exception as exc: # noqa: BLE001 — narrow + re-raise
@@ -487,8 +534,6 @@ def read_pipeline_state(
487
534
  try:
488
535
  run_status = RunStatus(status_str)
489
536
  except ValueError:
490
- # Unknown status string from boto3 (forward-compatibility) — fail
491
- # loud rather than silently mis-render.
492
537
  raise PipelineStatusError(
493
538
  f"Unknown SF execution status {status_str!r} from boto3 for {execution_arn}"
494
539
  )
@@ -502,8 +547,8 @@ def read_pipeline_state(
502
547
  failure_cause = (
503
548
  _failure_cause_from(describe_resp) if run_status == RunStatus.FAILED else None
504
549
  )
550
+ pipeline_role = _extract_pipeline_role(describe_resp)
505
551
 
506
- # 3. GetExecutionHistory
507
552
  try:
508
553
  history_resp = client.get_execution_history(
509
554
  executionArn=execution_arn,
@@ -531,9 +576,288 @@ def read_pipeline_state(
531
576
  tasks=tasks,
532
577
  failing_state=failing_state,
533
578
  failure_cause=failure_cause,
579
+ pipeline_role=pipeline_role,
580
+ )
581
+
582
+
583
+ def _find_execution_matching_role(
584
+ state_machine_arn: str,
585
+ role_filter: set[str],
586
+ *,
587
+ client: "SFNClient",
588
+ search_limit: int,
589
+ ) -> Optional[tuple[str, Optional[str]]]:
590
+ """Walk ListExecutions pages until finding an execution whose
591
+ ``input.pipeline_role`` ∈ ``role_filter``, or until ``search_limit``
592
+ executions have been inspected.
593
+
594
+ Returns ``(execution_arn, role)`` on hit, ``None`` on exhaustion.
595
+ The N+1 DescribeExecution calls are the cost of the role filter;
596
+ typical cron-cadence SFs find a match within the first 1-3 executions
597
+ so the cost is bounded in practice. Smoke-heavy windows pay more but
598
+ the ``search_limit`` cap bounds worst case.
599
+
600
+ Caller is responsible for translating None into the right outcome —
601
+ either SFNNoExecutions (when ListExecutions was empty in the first
602
+ page) or a "no execution matches filter" fallback signal.
603
+ """
604
+ inspected = 0
605
+ next_token: Optional[str] = None
606
+ while inspected < search_limit:
607
+ kwargs: dict[str, Any] = {
608
+ "stateMachineArn": state_machine_arn,
609
+ "maxResults": min(_LIST_EXECUTIONS_PAGE_SIZE, search_limit - inspected),
610
+ }
611
+ if next_token:
612
+ kwargs["nextToken"] = next_token
613
+ try:
614
+ list_resp = client.list_executions(**kwargs)
615
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
616
+ _raise_for_boto_error(exc, "ListExecutions")
617
+
618
+ executions = list_resp.get("executions") or []
619
+ if not executions:
620
+ return None
621
+ for ex in executions:
622
+ inspected += 1
623
+ execution_arn = ex.get("executionArn")
624
+ if not execution_arn:
625
+ continue
626
+ try:
627
+ describe_resp = client.describe_execution(executionArn=execution_arn)
628
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
629
+ _raise_for_boto_error(exc, "DescribeExecution")
630
+ role = _extract_pipeline_role(describe_resp)
631
+ if role is not None and role in role_filter:
632
+ return execution_arn, role
633
+
634
+ next_token = list_resp.get("nextToken")
635
+ if not next_token:
636
+ return None
637
+
638
+ return None
639
+
640
+
641
+ # ── Public entry point ────────────────────────────────────────────────────
642
+
643
+
644
+ def read_pipeline_state(
645
+ state_machine_arn: str,
646
+ *,
647
+ role_filter: Optional[set[str]] = None,
648
+ search_limit: int = _DEFAULT_ROLE_SEARCH_LIMIT,
649
+ execution_arn: Optional[str] = None,
650
+ client: Optional["SFNClient"] = None,
651
+ ) -> PipelineRun:
652
+ """Project the chosen execution of ``state_machine_arn`` onto a typed
653
+ :class:`PipelineRun`.
654
+
655
+ Default behavior (no ``role_filter``, no ``execution_arn``) is
656
+ backwards-compatible: returns the most-recent execution per
657
+ ``ListExecutions maxResults=1``, same as pre-Option-D.
658
+
659
+ Option-D execution-picker semantics:
660
+
661
+ - When ``execution_arn`` is set, fetches that specific execution
662
+ directly (bypasses ListExecutions). Used by the dashboard's
663
+ dropdown "click a row to inspect this execution" path.
664
+ - When ``role_filter`` is set, walks ListExecutions pages until
665
+ finding the most-recent execution whose ``input.pipeline_role``
666
+ is in the filter set. If none match within ``search_limit``
667
+ executions, raises :class:`SFNNoExecutions` with a message naming
668
+ the filter — the caller (page 25) renders a banner like "No
669
+ 'weekly' execution in the last 50 runs; click 'View other recent
670
+ executions' to inspect what's actually been running."
671
+
672
+ Parameters
673
+ ----------
674
+ state_machine_arn:
675
+ Full SF ARN.
676
+ role_filter:
677
+ Optional set of ``pipeline_role`` values to filter executions by
678
+ (e.g. ``{"weekly"}`` for the Saturday-SF cadence run, ``{"daily"}``
679
+ for the Weekday-SF cadence run). ``None`` = no filter (most-recent
680
+ regardless of role — current behavior).
681
+ search_limit:
682
+ Bounds the role-filter walk. Default 50 — see
683
+ :data:`_DEFAULT_ROLE_SEARCH_LIMIT`. Ignored when ``role_filter``
684
+ is None.
685
+ execution_arn:
686
+ Optional specific execution ARN to fetch. When set, both
687
+ ``role_filter`` and ``search_limit`` are ignored.
688
+ client:
689
+ Optional boto3 ``stepfunctions`` client. Tests pass a mock here;
690
+ production passes None.
691
+
692
+ Raises
693
+ ------
694
+ SFNAccessDenied
695
+ IAM denial on any of the three required actions.
696
+ SFNThrottled
697
+ Rate-limit on any of the three.
698
+ SFNNoExecutions
699
+ SF has zero executions, OR ``role_filter`` is set and no
700
+ execution within the search window matches.
701
+ PipelineStatusError
702
+ Any other unexpected error path.
703
+ """
704
+ if client is None: # pragma: no cover — production path
705
+ import boto3
706
+
707
+ client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
708
+
709
+ # Path 1: explicit execution_arn — fetch directly.
710
+ if execution_arn is not None:
711
+ return _build_pipeline_run_from_execution_arn(
712
+ execution_arn, state_machine_arn, client=client
713
+ )
714
+
715
+ # Path 2: role_filter — walk ListExecutions until match.
716
+ if role_filter:
717
+ match = _find_execution_matching_role(
718
+ state_machine_arn, role_filter, client=client, search_limit=search_limit
719
+ )
720
+ if match is None:
721
+ raise SFNNoExecutions(
722
+ f"No execution with pipeline_role in {sorted(role_filter)!r} "
723
+ f"found within last {search_limit} executions of {state_machine_arn}."
724
+ )
725
+ matched_arn, _matched_role = match
726
+ return _build_pipeline_run_from_execution_arn(
727
+ matched_arn, state_machine_arn, client=client
728
+ )
729
+
730
+ # Path 3 (default): most-recent execution regardless of role —
731
+ # backwards-compatible with pre-Option-D callers.
732
+ try:
733
+ list_resp = client.list_executions(
734
+ stateMachineArn=state_machine_arn,
735
+ maxResults=1,
736
+ )
737
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
738
+ _raise_for_boto_error(exc, "ListExecutions")
739
+
740
+ executions = list_resp.get("executions") or []
741
+ if not executions:
742
+ raise SFNNoExecutions(
743
+ f"State machine {state_machine_arn} has no executions yet."
744
+ )
745
+
746
+ latest = executions[0]
747
+ return _build_pipeline_run_from_execution_arn(
748
+ latest.get("executionArn"), state_machine_arn, client=client
534
749
  )
535
750
 
536
751
 
752
+ def list_recent_pipeline_runs(
753
+ state_machine_arn: str,
754
+ *,
755
+ limit: int = 10,
756
+ role_filter: Optional[set[str]] = None,
757
+ client: Optional["SFNClient"] = None,
758
+ ) -> list[PipelineExecutionSummary]:
759
+ """Return lightweight summaries of the most-recent N executions.
760
+
761
+ Backs the page-25 "View other recent executions" disclosure: shows
762
+ the operator what's been running on this SF, ranked most-recent
763
+ first, with the ``pipeline_role`` of each so smoke vs. weekly vs.
764
+ recovery is visible at a glance.
765
+
766
+ Each summary requires one ``DescribeExecution`` call (to extract
767
+ ``pipeline_role`` from the input JSON) on top of one
768
+ ``ListExecutions`` call, so this is O(limit) API calls. Default
769
+ ``limit=10`` puts the dashboard's "show me last N" view at ~11
770
+ SF API calls per page render — well within the 25-TPS soft limit
771
+ states:DescribeExecution applies.
772
+
773
+ Parameters
774
+ ----------
775
+ state_machine_arn:
776
+ Full SF ARN.
777
+ limit:
778
+ Max number of executions to return. Default 10.
779
+ role_filter:
780
+ Optional pre-filter (returns only executions whose
781
+ ``pipeline_role`` ∈ ``role_filter``). When set, the API call
782
+ budget grows because we may have to walk past role-mismatched
783
+ executions; bounded by an internal walk cap of ``limit * 5``.
784
+ client:
785
+ Optional boto3 ``stepfunctions`` client.
786
+ """
787
+ if client is None: # pragma: no cover — production path
788
+ import boto3
789
+
790
+ client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
791
+
792
+ walk_cap = limit if role_filter is None else min(limit * 5, _DEFAULT_ROLE_SEARCH_LIMIT)
793
+ summaries: list[PipelineExecutionSummary] = []
794
+ inspected = 0
795
+ next_token: Optional[str] = None
796
+
797
+ while len(summaries) < limit and inspected < walk_cap:
798
+ kwargs: dict[str, Any] = {
799
+ "stateMachineArn": state_machine_arn,
800
+ "maxResults": min(_LIST_EXECUTIONS_PAGE_SIZE, walk_cap - inspected),
801
+ }
802
+ if next_token:
803
+ kwargs["nextToken"] = next_token
804
+ try:
805
+ list_resp = client.list_executions(**kwargs)
806
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
807
+ _raise_for_boto_error(exc, "ListExecutions")
808
+
809
+ executions = list_resp.get("executions") or []
810
+ if not executions:
811
+ break
812
+ for ex in executions:
813
+ inspected += 1
814
+ execution_arn = ex.get("executionArn")
815
+ if not execution_arn:
816
+ continue
817
+ try:
818
+ describe_resp = client.describe_execution(executionArn=execution_arn)
819
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
820
+ _raise_for_boto_error(exc, "DescribeExecution")
821
+ role = _extract_pipeline_role(describe_resp)
822
+ if role_filter is not None and role not in role_filter:
823
+ continue
824
+ status_str = describe_resp.get("status", "RUNNING")
825
+ try:
826
+ status = RunStatus(status_str)
827
+ except ValueError:
828
+ raise PipelineStatusError(
829
+ f"Unknown SF execution status {status_str!r} from boto3 for {execution_arn}"
830
+ )
831
+ start_utc = _parse_ts(describe_resp.get("startDate"))
832
+ end_utc = _parse_ts(describe_resp.get("stopDate"))
833
+ duration: Optional[float] = None
834
+ if start_utc is not None and end_utc is not None:
835
+ duration = (end_utc - start_utc).total_seconds()
836
+ if start_utc is None:
837
+ # An execution without a start time is degenerate; skip
838
+ # rather than fail the whole list.
839
+ continue
840
+ summaries.append(
841
+ PipelineExecutionSummary(
842
+ execution_arn=execution_arn,
843
+ name=ex.get("name") or execution_arn.rsplit(":", 1)[-1],
844
+ status=status,
845
+ start_utc=start_utc,
846
+ end_utc=end_utc,
847
+ duration_sec=duration,
848
+ pipeline_role=role,
849
+ )
850
+ )
851
+ if len(summaries) >= limit:
852
+ break
853
+
854
+ next_token = list_resp.get("nextToken")
855
+ if not next_token:
856
+ break
857
+
858
+ return summaries
859
+
860
+
537
861
  def _raise_for_boto_error(exc: Exception, action: str) -> None:
538
862
  """Translate a boto3 exception into a typed PipelineStatusError.
539
863