alpha-engine-lib 0.35.1__tar.gz → 0.36.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/PKG-INFO +1 -1
  2. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/pyproject.toml +1 -1
  3. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/__init__.py +1 -1
  4. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/__init__.py +4 -0
  5. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/read.py +374 -64
  6. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/PKG-INFO +1 -1
  7. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_read.py +248 -0
  8. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/README.md +0 -0
  9. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/setup.cfg +0 -0
  10. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/agent_schemas.py +0 -0
  11. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/alerts.py +0 -0
  12. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/arcticdb.py +0 -0
  13. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/collector_results.py +0 -0
  14. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/cost.py +0 -0
  15. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/dates.py +0 -0
  16. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/decision_capture.py +0 -0
  17. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ec2_spot.py +0 -0
  18. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/email_sender.py +0 -0
  19. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/eval_artifacts.py +0 -0
  20. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/logging.py +0 -0
  21. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/model_pricing.yaml +0 -0
  22. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pillars.py +0 -0
  23. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/registry.py +0 -0
  24. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/pipeline_status/templates.py +0 -0
  25. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/preflight.py +0 -0
  26. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/__init__.py +0 -0
  27. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/db.py +0 -0
  28. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/embeddings.py +0 -0
  29. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/migrations/0001_content_tsv.sql +0 -0
  30. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/rerank.py +0 -0
  31. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/retrieval.py +0 -0
  32. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/rag/schema.sql +0 -0
  33. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/reconcile.py +0 -0
  34. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/secrets.py +0 -0
  35. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/sources/__init__.py +0 -0
  36. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/sources/protocols.py +0 -0
  37. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ssm_dispatcher.py +0 -0
  38. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/ssm_log_capture.py +0 -0
  39. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/telegram.py +0 -0
  40. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/trading_calendar.py +0 -0
  41. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/transparency.py +0 -0
  42. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/transparency_inventory.yaml +0 -0
  43. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib/universe.py +0 -0
  44. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/SOURCES.txt +0 -0
  45. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/dependency_links.txt +0 -0
  46. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/requires.txt +0 -0
  47. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/src/alpha_engine_lib.egg-info/top_level.txt +0 -0
  48. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_agent_schemas.py +0 -0
  49. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_alerts.py +0 -0
  50. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_arcticdb.py +0 -0
  51. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_collector_results.py +0 -0
  52. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_cost.py +0 -0
  53. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_dates.py +0 -0
  54. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_decision_capture.py +0 -0
  55. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_ec2_spot.py +0 -0
  56. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_email_sender.py +0 -0
  57. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_eval_artifacts.py +0 -0
  58. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_logging.py +0 -0
  59. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_pillars.py +0 -0
  60. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_registry.py +0 -0
  61. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_pipeline_status_templates.py +0 -0
  62. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_preflight.py +0 -0
  63. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_rag.py +0 -0
  64. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_rag_rerank.py +0 -0
  65. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_rag_retrieval_hybrid.py +0 -0
  66. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_reconcile.py +0 -0
  67. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_secrets.py +0 -0
  68. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_sources_protocols.py +0 -0
  69. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_ssm_dispatcher.py +0 -0
  70. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_ssm_log_capture.py +0 -0
  71. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_telegram.py +0 -0
  72. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_trading_calendar.py +0 -0
  73. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_transparency.py +0 -0
  74. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_universe.py +0 -0
  75. {alpha_engine_lib-0.35.1 → alpha_engine_lib-0.36.0}/tests/test_version_pin.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alpha-engine-lib
3
- Version: 0.35.1
3
+ Version: 0.36.0
4
4
  Summary: Shared utilities for the Alpha Engine modules: preflight, structured logging with secret-redaction, ArcticDB universe access, NYSE-calendar dates + freshness predicates, decision capture, cost telemetry, RAG, agent output schemas, SSM-backed secrets, Telegram alerts + SNS fan-out, EC2 spot-launch resilience, SSM log-capture chokepoint, SSM send-command + poll chokepoint, and Step-Functions execution-state projection. Full surface documented in README.
5
5
  Author: Brian McMahon
6
6
  License: Proprietary
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alpha-engine-lib"
7
- version = "0.35.1"
7
+ version = "0.36.0"
8
8
  description = "Shared utilities for the Alpha Engine modules: preflight, structured logging with secret-redaction, ArcticDB universe access, NYSE-calendar dates + freshness predicates, decision capture, cost telemetry, RAG, agent output schemas, SSM-backed secrets, Telegram alerts + SNS fan-out, EC2 spot-launch resilience, SSM log-capture chokepoint, SSM send-command + poll chokepoint, and Step-Functions execution-state projection. Full surface documented in README."
9
9
  readme = "README.md"
10
10
  # EC2 still runs Python 3.9 on the always-on micro instance (boto3 drops
@@ -1,3 +1,3 @@
1
1
  """alpha-engine-lib — shared utilities for Alpha Engine modules."""
2
2
 
3
- __version__ = "0.35.1"
3
+ __version__ = "0.36.0"
@@ -31,6 +31,7 @@ on first build, not after the second consumer arrives.
31
31
  from __future__ import annotations
32
32
 
33
33
  from .read import (
34
+ PipelineExecutionSummary,
34
35
  PipelineRun,
35
36
  RunStatus,
36
37
  SFNAccessDenied,
@@ -38,6 +39,7 @@ from .read import (
38
39
  SFNThrottled,
39
40
  TaskRow,
40
41
  TaskStatus,
42
+ list_recent_pipeline_runs,
41
43
  read_pipeline_state,
42
44
  )
43
45
  from .registry import (
@@ -54,6 +56,7 @@ __all__ = [
54
56
  "ArchivePageRef",
55
57
  "ArtifactReason",
56
58
  "PIPELINE_LABELS",
59
+ "PipelineExecutionSummary",
57
60
  "PipelineRun",
58
61
  "RunStatus",
59
62
  "SFNAccessDenied",
@@ -66,5 +69,6 @@ __all__ = [
66
69
  "WAIT_GROUPING",
67
70
  "format_failure_message",
68
71
  "format_success_message",
72
+ "list_recent_pipeline_runs",
69
73
  "read_pipeline_state",
70
74
  ]
@@ -31,6 +31,7 @@ red banner always names a specific cause.
31
31
 
32
32
  from __future__ import annotations
33
33
 
34
+ import json
34
35
  import logging
35
36
  from dataclasses import dataclass
36
37
  from datetime import datetime, timezone
@@ -194,6 +195,39 @@ class PipelineRun(BaseModel):
194
195
  tasks: list[TaskRow] = Field(default_factory=list)
195
196
  failing_state: Optional[str] = None # populated only when status == FAILED
196
197
  failure_cause: Optional[str] = None # populated only when status == FAILED
198
+ # The ``pipeline_role`` carried on this execution's input JSON
199
+ # (e.g. "weekly" / "daily" / "eod" / "smoke" / "recovery" /
200
+ # "shell-run" / "backfill" / "operator-replay"). None when the input
201
+ # JSON doesn't carry the field — typical of pre-Option-D executions
202
+ # and ad-hoc operator launches that haven't adopted the convention.
203
+ # The dashboard exposes this in the section header so the operator
204
+ # always knows whether they're looking at the canonical cadence run
205
+ # or a smoke / recovery overlay.
206
+ pipeline_role: Optional[str] = None
207
+
208
+
209
+ class PipelineExecutionSummary(BaseModel):
210
+ """Lightweight per-execution summary for the operator dropdown.
211
+
212
+ Returned by :func:`list_recent_pipeline_runs`. Does NOT carry the
213
+ full per-state task table (that lives on :class:`PipelineRun`) — the
214
+ dropdown's job is to let the operator pick one execution to inspect
215
+ in detail, at which point :func:`read_pipeline_state` returns the
216
+ full run for the chosen ARN.
217
+
218
+ ``pipeline_role`` is parsed from the execution's input JSON via the
219
+ DescribeExecution call; None when the input lacks the field.
220
+ """
221
+
222
+ model_config = _STRICT_CONFIG
223
+
224
+ execution_arn: str
225
+ name: str
226
+ status: RunStatus
227
+ start_utc: datetime
228
+ end_utc: Optional[datetime] = None
229
+ duration_sec: Optional[float] = None
230
+ pipeline_role: Optional[str] = None
197
231
 
198
232
 
199
233
  # ── Helpers ───────────────────────────────────────────────────────────────
@@ -418,80 +452,79 @@ def _failing_state_from_history(history_events: list[dict]) -> Optional[str]:
418
452
  return None
419
453
 
420
454
 
421
- # ── Public entry point ────────────────────────────────────────────────────
455
+ # ── Role-filter helpers (Option-D execution-picker substrate) ─────────────
422
456
 
423
457
 
424
- def read_pipeline_state(
425
- state_machine_arn: str,
426
- *,
427
- client: Optional["SFNClient"] = None,
428
- ) -> PipelineRun:
429
- """Project the most-recent execution of ``state_machine_arn`` onto a
430
- typed :class:`PipelineRun`.
458
+ # Bounds the ListExecutions walk when a role filter is set — we page
459
+ # backwards through history looking for the first execution whose
460
+ # input.pipeline_role matches the filter. 50 is enough to span ~6 months
461
+ # of weekly cadence even if every intervening execution is a smoke /
462
+ # recovery overlay; raise it only if smoke-density is genuinely that high.
463
+ _DEFAULT_ROLE_SEARCH_LIMIT = 50
431
464
 
432
- Calls (in order):
433
-
434
- 1. ``states:ListExecutions(stateMachineArn=..., maxResults=1)`` finds
435
- the latest execution arn. If the SF has zero executions, raises
436
- :class:`SFNNoExecutions`.
437
- 2. ``states:DescribeExecution(executionArn=...)`` — top-level status +
438
- start/stop + failure cause.
439
- 3. ``states:GetExecutionHistory(executionArn=..., maxResults=1000)`` —
440
- per-state events for the Task row table.
441
-
442
- Parameters
443
- ----------
444
- state_machine_arn:
445
- Full SF ARN, e.g. ``arn:aws:states:us-east-1:711398986525:stateMachine:alpha-engine-saturday-pipeline``.
446
- client:
447
- Optional boto3 ``stepfunctions`` client. Tests pass a mock here;
448
- production passes None and gets a fresh client per call (cheap;
449
- boto3 caches under the hood).
465
+ # ListExecutions page size — boto3 caps at 1000 but we keep pages small
466
+ # so a typical "find the most-recent weekly within the last 50" walk only
467
+ # hits the API once or twice.
468
+ _LIST_EXECUTIONS_PAGE_SIZE = 25
450
469
 
451
- Returns
452
- -------
453
- PipelineRun
454
- Fully populated except when ``status == NOT_RUN`` (only
455
- ``state_machine_arn`` + ``pretty_label`` + ``status`` set).
456
470
 
457
- Raises
458
- ------
459
- SFNAccessDenied
460
- IAM denial on any of the three required actions.
461
- SFNThrottled
462
- Rate-limit on any of the three.
463
- SFNNoExecutions
464
- SF exists but has zero executions ever.
465
- PipelineStatusError
466
- Any other unexpected error path — the caller renders a red banner.
467
- """
468
- if client is None: # pragma: no cover — production path
469
- import boto3
471
+ def _extract_pipeline_role(describe_resp: dict) -> Optional[str]:
472
+ """Parse ``input.pipeline_role`` from a DescribeExecution response.
470
473
 
471
- client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
474
+ DescribeExecution returns ``input`` as a JSON-encoded string. The
475
+ Option-D convention is that all cron-triggered executions carry a
476
+ ``pipeline_role`` field at top level (``{"pipeline_role": "weekly",
477
+ ...}``) and ad-hoc operator launches set it explicitly (smoke /
478
+ recovery / operator-replay / etc).
472
479
 
473
- label = _label_for_arn(state_machine_arn)
480
+ Returns None on:
481
+ - missing ``input`` field
482
+ - malformed JSON (logged at WARN; the page renders "role: unknown")
483
+ - JSON parses but ``pipeline_role`` is absent
474
484
 
475
- # 1. ListExecutions
485
+ Permissive on parse failures (warn + return None rather than raise)
486
+ because input-shape is operator-controlled and we'd rather show the
487
+ execution with role=None than blackhole the whole page on a malformed
488
+ input JSON. Per ``feedback_no_silent_fails`` the WARN log is the
489
+ recording surface.
490
+ """
491
+ raw_input = describe_resp.get("input")
492
+ if not raw_input or not isinstance(raw_input, str):
493
+ return None
476
494
  try:
477
- list_resp = client.list_executions(
478
- stateMachineArn=state_machine_arn,
479
- maxResults=1,
495
+ parsed = json.loads(raw_input)
496
+ except (ValueError, TypeError) as exc:
497
+ logger.warning(
498
+ "Could not parse SF execution input JSON; pipeline_role=None: %s", exc
480
499
  )
481
- except Exception as exc: # noqa: BLE001 — narrow + re-raise
482
- _raise_for_boto_error(exc, "ListExecutions")
500
+ return None
501
+ if not isinstance(parsed, dict):
502
+ return None
503
+ role = parsed.get("pipeline_role")
504
+ return role if isinstance(role, str) and role else None
483
505
 
484
- executions = list_resp.get("executions") or []
485
- if not executions:
486
- raise SFNNoExecutions(
487
- f"State machine {state_machine_arn} has no executions yet."
488
- )
489
506
 
490
- latest = executions[0]
491
- execution_arn = latest.get("executionArn")
492
- execution_name = latest.get("name")
507
+ def _build_pipeline_run_from_execution_arn(
508
+ execution_arn: str,
509
+ state_machine_arn: str,
510
+ *,
511
+ client: "SFNClient",
512
+ ) -> PipelineRun:
513
+ """Project a known execution ARN onto a typed :class:`PipelineRun`.
514
+
515
+ Helper that holds the DescribeExecution + GetExecutionHistory +
516
+ materialize-tasks pipeline. Callers responsible for the execution
517
+ name (passed in via the ARN — derived if not supplied separately).
518
+
519
+ Used by :func:`read_pipeline_state` after the role-filter walk picks
520
+ the target execution, AND directly when an operator clicks a specific
521
+ execution in the dropdown.
522
+ """
523
+ label = _label_for_arn(state_machine_arn)
524
+ # Derive execution_name from ARN — the ARN tail is
525
+ # ``execution:<sm-name>:<execution-name>``.
526
+ execution_name = execution_arn.rsplit(":", 1)[-1] if execution_arn else None
493
527
 
494
- # 2. DescribeExecution
495
528
  try:
496
529
  describe_resp = client.describe_execution(executionArn=execution_arn)
497
530
  except Exception as exc: # noqa: BLE001 — narrow + re-raise
@@ -501,8 +534,6 @@ def read_pipeline_state(
501
534
  try:
502
535
  run_status = RunStatus(status_str)
503
536
  except ValueError:
504
- # Unknown status string from boto3 (forward-compatibility) — fail
505
- # loud rather than silently mis-render.
506
537
  raise PipelineStatusError(
507
538
  f"Unknown SF execution status {status_str!r} from boto3 for {execution_arn}"
508
539
  )
@@ -516,8 +547,8 @@ def read_pipeline_state(
516
547
  failure_cause = (
517
548
  _failure_cause_from(describe_resp) if run_status == RunStatus.FAILED else None
518
549
  )
550
+ pipeline_role = _extract_pipeline_role(describe_resp)
519
551
 
520
- # 3. GetExecutionHistory
521
552
  try:
522
553
  history_resp = client.get_execution_history(
523
554
  executionArn=execution_arn,
@@ -545,9 +576,288 @@ def read_pipeline_state(
545
576
  tasks=tasks,
546
577
  failing_state=failing_state,
547
578
  failure_cause=failure_cause,
579
+ pipeline_role=pipeline_role,
580
+ )
581
+
582
+
583
+ def _find_execution_matching_role(
584
+ state_machine_arn: str,
585
+ role_filter: set[str],
586
+ *,
587
+ client: "SFNClient",
588
+ search_limit: int,
589
+ ) -> Optional[tuple[str, Optional[str]]]:
590
+ """Walk ListExecutions pages until finding an execution whose
591
+ ``input.pipeline_role`` ∈ ``role_filter``, or until ``search_limit``
592
+ executions have been inspected.
593
+
594
+ Returns ``(execution_arn, role)`` on hit, ``None`` on exhaustion.
595
+ The N+1 DescribeExecution calls are the cost of the role filter;
596
+ typical cron-cadence SFs find a match within the first 1-3 executions
597
+ so the cost is bounded in practice. Smoke-heavy windows pay more but
598
+ the ``search_limit`` cap bounds worst case.
599
+
600
+ Caller is responsible for translating None into the right outcome —
601
+ either SFNNoExecutions (when ListExecutions was empty in the first
602
+ page) or a "no execution matches filter" fallback signal.
603
+ """
604
+ inspected = 0
605
+ next_token: Optional[str] = None
606
+ while inspected < search_limit:
607
+ kwargs: dict[str, Any] = {
608
+ "stateMachineArn": state_machine_arn,
609
+ "maxResults": min(_LIST_EXECUTIONS_PAGE_SIZE, search_limit - inspected),
610
+ }
611
+ if next_token:
612
+ kwargs["nextToken"] = next_token
613
+ try:
614
+ list_resp = client.list_executions(**kwargs)
615
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
616
+ _raise_for_boto_error(exc, "ListExecutions")
617
+
618
+ executions = list_resp.get("executions") or []
619
+ if not executions:
620
+ return None
621
+ for ex in executions:
622
+ inspected += 1
623
+ execution_arn = ex.get("executionArn")
624
+ if not execution_arn:
625
+ continue
626
+ try:
627
+ describe_resp = client.describe_execution(executionArn=execution_arn)
628
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
629
+ _raise_for_boto_error(exc, "DescribeExecution")
630
+ role = _extract_pipeline_role(describe_resp)
631
+ if role is not None and role in role_filter:
632
+ return execution_arn, role
633
+
634
+ next_token = list_resp.get("nextToken")
635
+ if not next_token:
636
+ return None
637
+
638
+ return None
639
+
640
+
641
+ # ── Public entry point ────────────────────────────────────────────────────
642
+
643
+
644
+ def read_pipeline_state(
645
+ state_machine_arn: str,
646
+ *,
647
+ role_filter: Optional[set[str]] = None,
648
+ search_limit: int = _DEFAULT_ROLE_SEARCH_LIMIT,
649
+ execution_arn: Optional[str] = None,
650
+ client: Optional["SFNClient"] = None,
651
+ ) -> PipelineRun:
652
+ """Project the chosen execution of ``state_machine_arn`` onto a typed
653
+ :class:`PipelineRun`.
654
+
655
+ Default behavior (no ``role_filter``, no ``execution_arn``) is
656
+ backwards-compatible: returns the most-recent execution per
657
+ ``ListExecutions maxResults=1``, same as pre-Option-D.
658
+
659
+ Option-D execution-picker semantics:
660
+
661
+ - When ``execution_arn`` is set, fetches that specific execution
662
+ directly (bypasses ListExecutions). Used by the dashboard's
663
+ dropdown "click a row to inspect this execution" path.
664
+ - When ``role_filter`` is set, walks ListExecutions pages until
665
+ finding the most-recent execution whose ``input.pipeline_role``
666
+ is in the filter set. If none match within ``search_limit``
667
+ executions, raises :class:`SFNNoExecutions` with a message naming
668
+ the filter — the caller (page 25) renders a banner like "No
669
+ 'weekly' execution in the last 50 runs; click 'View other recent
670
+ executions' to inspect what's actually been running."
671
+
672
+ Parameters
673
+ ----------
674
+ state_machine_arn:
675
+ Full SF ARN.
676
+ role_filter:
677
+ Optional set of ``pipeline_role`` values to filter executions by
678
+ (e.g. ``{"weekly"}`` for the Saturday-SF cadence run, ``{"daily"}``
679
+ for the Weekday-SF cadence run). ``None`` = no filter (most-recent
680
+ regardless of role — current behavior).
681
+ search_limit:
682
+ Bounds the role-filter walk. Default 50 — see
683
+ :data:`_DEFAULT_ROLE_SEARCH_LIMIT`. Ignored when ``role_filter``
684
+ is None.
685
+ execution_arn:
686
+ Optional specific execution ARN to fetch. When set, both
687
+ ``role_filter`` and ``search_limit`` are ignored.
688
+ client:
689
+ Optional boto3 ``stepfunctions`` client. Tests pass a mock here;
690
+ production passes None.
691
+
692
+ Raises
693
+ ------
694
+ SFNAccessDenied
695
+ IAM denial on any of the three required actions.
696
+ SFNThrottled
697
+ Rate-limit on any of the three.
698
+ SFNNoExecutions
699
+ SF has zero executions, OR ``role_filter`` is set and no
700
+ execution within the search window matches.
701
+ PipelineStatusError
702
+ Any other unexpected error path.
703
+ """
704
+ if client is None: # pragma: no cover — production path
705
+ import boto3
706
+
707
+ client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
708
+
709
+ # Path 1: explicit execution_arn — fetch directly.
710
+ if execution_arn is not None:
711
+ return _build_pipeline_run_from_execution_arn(
712
+ execution_arn, state_machine_arn, client=client
713
+ )
714
+
715
+ # Path 2: role_filter — walk ListExecutions until match.
716
+ if role_filter:
717
+ match = _find_execution_matching_role(
718
+ state_machine_arn, role_filter, client=client, search_limit=search_limit
719
+ )
720
+ if match is None:
721
+ raise SFNNoExecutions(
722
+ f"No execution with pipeline_role in {sorted(role_filter)!r} "
723
+ f"found within last {search_limit} executions of {state_machine_arn}."
724
+ )
725
+ matched_arn, _matched_role = match
726
+ return _build_pipeline_run_from_execution_arn(
727
+ matched_arn, state_machine_arn, client=client
728
+ )
729
+
730
+ # Path 3 (default): most-recent execution regardless of role —
731
+ # backwards-compatible with pre-Option-D callers.
732
+ try:
733
+ list_resp = client.list_executions(
734
+ stateMachineArn=state_machine_arn,
735
+ maxResults=1,
736
+ )
737
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
738
+ _raise_for_boto_error(exc, "ListExecutions")
739
+
740
+ executions = list_resp.get("executions") or []
741
+ if not executions:
742
+ raise SFNNoExecutions(
743
+ f"State machine {state_machine_arn} has no executions yet."
744
+ )
745
+
746
+ latest = executions[0]
747
+ return _build_pipeline_run_from_execution_arn(
748
+ latest.get("executionArn"), state_machine_arn, client=client
548
749
  )
549
750
 
550
751
 
752
+ def list_recent_pipeline_runs(
753
+ state_machine_arn: str,
754
+ *,
755
+ limit: int = 10,
756
+ role_filter: Optional[set[str]] = None,
757
+ client: Optional["SFNClient"] = None,
758
+ ) -> list[PipelineExecutionSummary]:
759
+ """Return lightweight summaries of the most-recent N executions.
760
+
761
+ Backs the page-25 "View other recent executions" disclosure: shows
762
+ the operator what's been running on this SF, ranked most-recent
763
+ first, with the ``pipeline_role`` of each so smoke vs. weekly vs.
764
+ recovery is visible at a glance.
765
+
766
+ Each summary requires one ``DescribeExecution`` call (to extract
767
+ ``pipeline_role`` from the input JSON) on top of one
768
+ ``ListExecutions`` call, so this is O(limit) API calls. Default
769
+ ``limit=10`` puts the dashboard's "show me last N" view at ~11
770
+ SF API calls per page render — well within the 25-TPS soft limit
771
+ states:DescribeExecution applies.
772
+
773
+ Parameters
774
+ ----------
775
+ state_machine_arn:
776
+ Full SF ARN.
777
+ limit:
778
+ Max number of executions to return. Default 10.
779
+ role_filter:
780
+ Optional pre-filter (returns only executions whose
781
+ ``pipeline_role`` ∈ ``role_filter``). When set, the API call
782
+ budget grows because we may have to walk past role-mismatched
783
+ executions; bounded by an internal walk cap of ``limit * 5``.
784
+ client:
785
+ Optional boto3 ``stepfunctions`` client.
786
+ """
787
+ if client is None: # pragma: no cover — production path
788
+ import boto3
789
+
790
+ client = boto3.client("stepfunctions", region_name=_region_from_arn(state_machine_arn))
791
+
792
+ walk_cap = limit if role_filter is None else min(limit * 5, _DEFAULT_ROLE_SEARCH_LIMIT)
793
+ summaries: list[PipelineExecutionSummary] = []
794
+ inspected = 0
795
+ next_token: Optional[str] = None
796
+
797
+ while len(summaries) < limit and inspected < walk_cap:
798
+ kwargs: dict[str, Any] = {
799
+ "stateMachineArn": state_machine_arn,
800
+ "maxResults": min(_LIST_EXECUTIONS_PAGE_SIZE, walk_cap - inspected),
801
+ }
802
+ if next_token:
803
+ kwargs["nextToken"] = next_token
804
+ try:
805
+ list_resp = client.list_executions(**kwargs)
806
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
807
+ _raise_for_boto_error(exc, "ListExecutions")
808
+
809
+ executions = list_resp.get("executions") or []
810
+ if not executions:
811
+ break
812
+ for ex in executions:
813
+ inspected += 1
814
+ execution_arn = ex.get("executionArn")
815
+ if not execution_arn:
816
+ continue
817
+ try:
818
+ describe_resp = client.describe_execution(executionArn=execution_arn)
819
+ except Exception as exc: # noqa: BLE001 — narrow + re-raise
820
+ _raise_for_boto_error(exc, "DescribeExecution")
821
+ role = _extract_pipeline_role(describe_resp)
822
+ if role_filter is not None and role not in role_filter:
823
+ continue
824
+ status_str = describe_resp.get("status", "RUNNING")
825
+ try:
826
+ status = RunStatus(status_str)
827
+ except ValueError:
828
+ raise PipelineStatusError(
829
+ f"Unknown SF execution status {status_str!r} from boto3 for {execution_arn}"
830
+ )
831
+ start_utc = _parse_ts(describe_resp.get("startDate"))
832
+ end_utc = _parse_ts(describe_resp.get("stopDate"))
833
+ duration: Optional[float] = None
834
+ if start_utc is not None and end_utc is not None:
835
+ duration = (end_utc - start_utc).total_seconds()
836
+ if start_utc is None:
837
+ # An execution without a start time is degenerate; skip
838
+ # rather than fail the whole list.
839
+ continue
840
+ summaries.append(
841
+ PipelineExecutionSummary(
842
+ execution_arn=execution_arn,
843
+ name=ex.get("name") or execution_arn.rsplit(":", 1)[-1],
844
+ status=status,
845
+ start_utc=start_utc,
846
+ end_utc=end_utc,
847
+ duration_sec=duration,
848
+ pipeline_role=role,
849
+ )
850
+ )
851
+ if len(summaries) >= limit:
852
+ break
853
+
854
+ next_token = list_resp.get("nextToken")
855
+ if not next_token:
856
+ break
857
+
858
+ return summaries
859
+
860
+
551
861
  def _raise_for_boto_error(exc: Exception, action: str) -> None:
552
862
  """Translate a boto3 exception into a typed PipelineStatusError.
553
863
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alpha-engine-lib
3
- Version: 0.35.1
3
+ Version: 0.36.0
4
4
  Summary: Shared utilities for the Alpha Engine modules: preflight, structured logging with secret-redaction, ArcticDB universe access, NYSE-calendar dates + freshness predicates, decision capture, cost telemetry, RAG, agent output schemas, SSM-backed secrets, Telegram alerts + SNS fan-out, EC2 spot-launch resilience, SSM log-capture chokepoint, SSM send-command + poll chokepoint, and Step-Functions execution-state projection. Full surface documented in README.
5
5
  Author: Brian McMahon
6
6
  License: Proprietary
@@ -23,6 +23,7 @@ from unittest.mock import MagicMock
23
23
  import pytest
24
24
 
25
25
  from alpha_engine_lib.pipeline_status import (
26
+ PipelineExecutionSummary,
26
27
  PipelineRun,
27
28
  RunStatus,
28
29
  SFNAccessDenied,
@@ -30,10 +31,12 @@ from alpha_engine_lib.pipeline_status import (
30
31
  SFNThrottled,
31
32
  TaskRow,
32
33
  TaskStatus,
34
+ list_recent_pipeline_runs,
33
35
  read_pipeline_state,
34
36
  )
35
37
  from alpha_engine_lib.pipeline_status.read import (
36
38
  PipelineStatusError,
39
+ _extract_pipeline_role,
37
40
  _failure_cause_from,
38
41
  _materialize_tasks,
39
42
  _parse_ts,
@@ -535,3 +538,248 @@ def test_task_row_archive_round_trips_through_json_for_artifact_reason():
535
538
  "round-trip — same regression class as the ArchivePageRef test."
536
539
  )
537
540
  assert "Terminal success" in round_tripped_task.archive.reason
541
+
542
+
543
+ # ── pipeline_role extraction (Option-D substrate) ─────────────────────────
544
+
545
+
546
+ def test_extract_pipeline_role_happy_path():
547
+ """Standard EventBridge cron payload with pipeline_role set."""
548
+ describe = {
549
+ "input": '{"pipeline_role": "weekly", "run_date": "2026-05-30"}',
550
+ }
551
+ assert _extract_pipeline_role(describe) == "weekly"
552
+
553
+
554
+ def test_extract_pipeline_role_missing_field():
555
+ """Pre-Option-D execution input (no pipeline_role key) returns None."""
556
+ describe = {"input": '{"run_date": "2026-05-30"}'}
557
+ assert _extract_pipeline_role(describe) is None
558
+
559
+
560
+ def test_extract_pipeline_role_missing_input_field():
561
+ """DescribeExecution may omit the input field entirely on terminal
562
+ states (rare but possible) — degrade to None, not crash."""
563
+ assert _extract_pipeline_role({}) is None
564
+ assert _extract_pipeline_role({"input": None}) is None
565
+ assert _extract_pipeline_role({"input": ""}) is None
566
+
567
+
568
+ def test_extract_pipeline_role_malformed_json():
569
+ """Malformed input JSON — WARN-and-return-None per the lib's
570
+ permissive parse policy. Recording surface is the WARN log."""
571
+ describe = {"input": "{not valid json"}
572
+ assert _extract_pipeline_role(describe) is None
573
+
574
+
575
+ def test_extract_pipeline_role_input_is_array_not_object():
576
+ """SF allows array-shaped input; defensively handle it (return None
577
+ rather than raise) — pipeline_role is a top-level field on object
578
+ inputs only."""
579
+ describe = {"input": '["weekly"]'}
580
+ assert _extract_pipeline_role(describe) is None
581
+
582
+
583
+ def test_extract_pipeline_role_empty_string_returns_none():
584
+ """An explicit empty string in pipeline_role is treated as 'not set'
585
+ so the dashboard renders 'role: unknown' instead of '': empty cells
586
+ are operator-noise."""
587
+ describe = {"input": '{"pipeline_role": ""}'}
588
+ assert _extract_pipeline_role(describe) is None
589
+
590
+
591
+ # ── Role filter + execution_arn paths in read_pipeline_state ──────────────
592
+
593
+
594
+ def _make_describe_response(*, status="SUCCEEDED", role: Optional[str] = None) -> dict:
595
+ """Build a DescribeExecution response carrying an optional
596
+ pipeline_role on the input JSON. Default times preserved."""
597
+ body: dict = {
598
+ "status": status,
599
+ "startDate": datetime(2026, 5, 24, 9, 0, tzinfo=timezone.utc),
600
+ "stopDate": datetime(2026, 5, 24, 11, 30, tzinfo=timezone.utc),
601
+ }
602
+ if role is not None:
603
+ body["input"] = f'{{"pipeline_role": "{role}", "run_date": "2026-05-24"}}'
604
+ else:
605
+ body["input"] = '{"run_date": "2026-05-24"}'
606
+ return body
607
+
608
+
609
+ def _make_multi_execution_mock(
610
+ *,
611
+ executions: list[dict],
612
+ describe_by_arn: dict[str, dict],
613
+ ) -> MagicMock:
614
+ """Build an SFN mock where ListExecutions returns a list and
615
+ DescribeExecution dispatches by executionArn to the right response."""
616
+ client = MagicMock()
617
+ client.list_executions.return_value = {"executions": executions}
618
+
619
+ def _dispatch(executionArn: str, **_kwargs):
620
+ return describe_by_arn[executionArn]
621
+
622
+ client.describe_execution.side_effect = _dispatch
623
+ client.get_execution_history.return_value = {"events": []}
624
+ return client
625
+
626
+
627
+ def test_read_pipeline_state_default_returns_most_recent_unchanged():
628
+ """No role_filter, no execution_arn — same as pre-Option-D: most-recent
629
+ execution per ListExecutions maxResults=1."""
630
+ client = _make_sfn_mock()
631
+ run = read_pipeline_state(SATURDAY_ARN, client=client)
632
+ assert run.status == RunStatus.SUCCEEDED
633
+ # ListExecutions was called with maxResults=1 (default path).
634
+ client.list_executions.assert_called_once()
635
+ call_kwargs = client.list_executions.call_args.kwargs
636
+ assert call_kwargs.get("maxResults") == 1
637
+
638
+
639
+ def test_read_pipeline_state_with_role_filter_finds_first_match():
640
+ """Three executions in history: smoke / weekly / smoke. Filter to
641
+ 'weekly' — picks the middle one."""
642
+ smoke1_arn = EXECUTION_ARN + "-smoke1"
643
+ weekly_arn = EXECUTION_ARN + "-weekly"
644
+ smoke2_arn = EXECUTION_ARN + "-smoke2"
645
+ client = _make_multi_execution_mock(
646
+ executions=[
647
+ {"executionArn": smoke1_arn, "name": "smoke-l1995"},
648
+ {"executionArn": weekly_arn, "name": "weekly-20260524T090000"},
649
+ {"executionArn": smoke2_arn, "name": "smoke-debug"},
650
+ ],
651
+ describe_by_arn={
652
+ smoke1_arn: _make_describe_response(role="smoke"),
653
+ weekly_arn: _make_describe_response(role="weekly"),
654
+ smoke2_arn: _make_describe_response(role="smoke"),
655
+ },
656
+ )
657
+ run = read_pipeline_state(SATURDAY_ARN, role_filter={"weekly"}, client=client)
658
+ assert run.execution_arn == weekly_arn
659
+ assert run.pipeline_role == "weekly"
660
+
661
+
662
+ def test_read_pipeline_state_with_role_filter_no_match_raises():
663
+ """Three smoke executions, filter to 'weekly' — raises
664
+ SFNNoExecutions naming the filter so the caller can render an
665
+ operator-actionable banner."""
666
+ client = _make_multi_execution_mock(
667
+ executions=[
668
+ {"executionArn": EXECUTION_ARN + f"-{i}", "name": f"smoke-{i}"}
669
+ for i in range(3)
670
+ ],
671
+ describe_by_arn={
672
+ EXECUTION_ARN + f"-{i}": _make_describe_response(role="smoke")
673
+ for i in range(3)
674
+ },
675
+ )
676
+ with pytest.raises(SFNNoExecutions) as exc_info:
677
+ read_pipeline_state(
678
+ SATURDAY_ARN, role_filter={"weekly"}, search_limit=10, client=client
679
+ )
680
+ assert "weekly" in str(exc_info.value)
681
+
682
+
683
+ def test_read_pipeline_state_with_role_filter_treats_missing_role_as_no_match():
684
+ """Pre-Option-D executions lack pipeline_role; role_filter must NOT
685
+ match those (otherwise the filter is no filter at all). The walk
686
+ keeps going until an explicitly-tagged execution turns up."""
687
+ untagged_arn = EXECUTION_ARN + "-untagged"
688
+ weekly_arn = EXECUTION_ARN + "-weekly"
689
+ client = _make_multi_execution_mock(
690
+ executions=[
691
+ {"executionArn": untagged_arn, "name": "old-pre-option-d"},
692
+ {"executionArn": weekly_arn, "name": "weekly-20260524T090000"},
693
+ ],
694
+ describe_by_arn={
695
+ untagged_arn: _make_describe_response(role=None),
696
+ weekly_arn: _make_describe_response(role="weekly"),
697
+ },
698
+ )
699
+ run = read_pipeline_state(SATURDAY_ARN, role_filter={"weekly"}, client=client)
700
+ assert run.execution_arn == weekly_arn
701
+
702
+
703
+ def test_read_pipeline_state_with_execution_arn_fetches_specific_execution():
704
+ """Dropdown-click path: when execution_arn is set, the function fetches
705
+ that specific execution directly (bypasses ListExecutions). role_filter
706
+ and search_limit are ignored on this path."""
707
+ target_arn = EXECUTION_ARN + "-specific"
708
+ client = _make_multi_execution_mock(
709
+ executions=[], # ListExecutions intentionally empty — proves it's not called
710
+ describe_by_arn={target_arn: _make_describe_response(role="smoke")},
711
+ )
712
+ run = read_pipeline_state(SATURDAY_ARN, execution_arn=target_arn, client=client)
713
+ assert run.execution_arn == target_arn
714
+ assert run.pipeline_role == "smoke"
715
+ # ListExecutions must NOT have been called on the execution_arn path.
716
+ client.list_executions.assert_not_called()
717
+
718
+
719
+ def test_read_pipeline_state_carries_pipeline_role_to_returned_run():
720
+ """The pipeline_role field on PipelineRun is populated from input JSON
721
+ even when no role_filter is applied (default path) — the dashboard's
722
+ section header shows it regardless of how the execution was picked."""
723
+ client = _make_sfn_mock(
724
+ describe_response=_make_describe_response(role="weekly"),
725
+ )
726
+ run = read_pipeline_state(SATURDAY_ARN, client=client)
727
+ assert run.pipeline_role == "weekly"
728
+
729
+
730
+ def test_read_pipeline_state_pipeline_role_none_when_input_lacks_role():
731
+ """No pipeline_role in input → PipelineRun.pipeline_role is None
732
+ (rendered as 'role: unknown' on the dashboard)."""
733
+ client = _make_sfn_mock(
734
+ describe_response=_make_describe_response(role=None),
735
+ )
736
+ run = read_pipeline_state(SATURDAY_ARN, client=client)
737
+ assert run.pipeline_role is None
738
+
739
+
740
+ # ── list_recent_pipeline_runs ─────────────────────────────────────────────
741
+
742
+
743
+ def test_list_recent_pipeline_runs_returns_summaries_with_roles():
744
+ """Returns last N executions, each carrying its pipeline_role for the
745
+ operator dropdown's at-a-glance smoke-vs-weekly distinction."""
746
+ arns = [EXECUTION_ARN + f"-{i}" for i in range(5)]
747
+ roles = ["smoke", "weekly", "smoke", "weekly", "recovery"]
748
+ client = _make_multi_execution_mock(
749
+ executions=[
750
+ {"executionArn": a, "name": f"exec-{i}"} for i, a in enumerate(arns)
751
+ ],
752
+ describe_by_arn={a: _make_describe_response(role=r) for a, r in zip(arns, roles)},
753
+ )
754
+ summaries = list_recent_pipeline_runs(SATURDAY_ARN, limit=5, client=client)
755
+ assert len(summaries) == 5
756
+ assert all(isinstance(s, PipelineExecutionSummary) for s in summaries)
757
+ assert [s.pipeline_role for s in summaries] == roles
758
+
759
+
760
+ def test_list_recent_pipeline_runs_role_filter_pre_filters():
761
+ """When role_filter is set, only matching executions are returned —
762
+ the operator's "show me weekly runs only" view."""
763
+ arns = [EXECUTION_ARN + f"-{i}" for i in range(6)]
764
+ roles = ["smoke", "weekly", "smoke", "weekly", "recovery", "weekly"]
765
+ client = _make_multi_execution_mock(
766
+ executions=[
767
+ {"executionArn": a, "name": f"exec-{i}"} for i, a in enumerate(arns)
768
+ ],
769
+ describe_by_arn={a: _make_describe_response(role=r) for a, r in zip(arns, roles)},
770
+ )
771
+ summaries = list_recent_pipeline_runs(
772
+ SATURDAY_ARN, limit=10, role_filter={"weekly"}, client=client
773
+ )
774
+ assert len(summaries) == 3
775
+ assert all(s.pipeline_role == "weekly" for s in summaries)
776
+
777
+
778
+ def test_list_recent_pipeline_runs_empty_returns_empty_list():
779
+ """Zero executions → empty list (NOT SFNNoExecutions). The dropdown
780
+ just renders 'no executions yet' inline; the page-25 section banner
781
+ is the load-bearing error surface, not this lighter-weight API."""
782
+ client = MagicMock()
783
+ client.list_executions.return_value = {"executions": []}
784
+ summaries = list_recent_pipeline_runs(SATURDAY_ARN, limit=5, client=client)
785
+ assert summaries == []