alpha-engine-lib 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. alpha_engine_lib/__init__.py +3 -0
  2. alpha_engine_lib/agent_schemas.py +663 -0
  3. alpha_engine_lib/alerts.py +576 -0
  4. alpha_engine_lib/arcticdb.py +340 -0
  5. alpha_engine_lib/collector_results.py +69 -0
  6. alpha_engine_lib/cost.py +665 -0
  7. alpha_engine_lib/dates.py +273 -0
  8. alpha_engine_lib/decision_capture.py +462 -0
  9. alpha_engine_lib/ec2_spot.py +363 -0
  10. alpha_engine_lib/email_sender.py +206 -0
  11. alpha_engine_lib/eval_artifacts.py +361 -0
  12. alpha_engine_lib/logging.py +303 -0
  13. alpha_engine_lib/model_pricing.yaml +73 -0
  14. alpha_engine_lib/pillars.py +756 -0
  15. alpha_engine_lib/pipeline_status/__init__.py +70 -0
  16. alpha_engine_lib/pipeline_status/read.py +541 -0
  17. alpha_engine_lib/pipeline_status/registry.py +368 -0
  18. alpha_engine_lib/pipeline_status/templates.py +120 -0
  19. alpha_engine_lib/preflight.py +444 -0
  20. alpha_engine_lib/rag/__init__.py +39 -0
  21. alpha_engine_lib/rag/db.py +96 -0
  22. alpha_engine_lib/rag/embeddings.py +63 -0
  23. alpha_engine_lib/rag/migrations/0001_content_tsv.sql +39 -0
  24. alpha_engine_lib/rag/rerank.py +377 -0
  25. alpha_engine_lib/rag/retrieval.py +465 -0
  26. alpha_engine_lib/rag/schema.sql +65 -0
  27. alpha_engine_lib/reconcile.py +203 -0
  28. alpha_engine_lib/secrets.py +186 -0
  29. alpha_engine_lib/sources/__init__.py +35 -0
  30. alpha_engine_lib/sources/protocols.py +227 -0
  31. alpha_engine_lib/ssm_log_capture.py +274 -0
  32. alpha_engine_lib/telegram.py +165 -0
  33. alpha_engine_lib/trading_calendar.py +236 -0
  34. alpha_engine_lib/transparency.py +746 -0
  35. alpha_engine_lib/transparency_inventory.yaml +260 -0
  36. alpha_engine_lib/universe.py +83 -0
  37. alpha_engine_lib-0.32.0.dist-info/METADATA +217 -0
  38. alpha_engine_lib-0.32.0.dist-info/RECORD +40 -0
  39. alpha_engine_lib-0.32.0.dist-info/WHEEL +5 -0
  40. alpha_engine_lib-0.32.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,340 @@
1
+ """
2
+ ArcticDB helpers: uniform library-open path + common read patterns.
3
+
4
+ Centralizes the ``adb.Arctic(uri).get_library("...")`` boilerplate that was
5
+ duplicated across predictor, research, backtester, data, and executor. Every
6
+ site was constructing the same S3 URI string by hand — one escape bug in
7
+ that string (path_prefix= query param collapsing under shell double-quote
8
+ interpolation) surfaced 2026-04-21 during the SNDK incident.
9
+
10
+ Using this module guarantees that:
11
+
12
+ - The S3 URI format stays consistent everywhere (single source of truth).
13
+ - Library-open failures raise a uniform ``RuntimeError`` with bucket
14
+ context, so downstream errors have a consistent shape.
15
+ - ``arcticdb`` is imported lazily inside each function, so this module
16
+ stays importable on consumers that don't install the ``[arcticdb]``
17
+ optional extra (e.g. lightweight CLI tools that only use the logging
18
+ submodule).
19
+
20
+ Requires the ``arcticdb`` optional extra
21
+ (``alpha-engine-lib[arcticdb]``) to actually call any function here.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import logging
27
+ import os
28
+ from typing import TYPE_CHECKING
29
+
30
+ if TYPE_CHECKING: # pragma: no cover
31
+ from arcticdb.version_store.library import Library
32
+
33
+ log = logging.getLogger(__name__)
34
+
35
+ # Library name constants — these match what every alpha-engine module uses.
36
+ # Centralized so a rename propagates from one place.
37
+ UNIVERSE_LIB = "universe"
38
+ MACRO_LIB = "macro"
39
+
40
+
41
+ def arctic_uri(bucket: str, *, region: str | None = None) -> str:
42
+ """Return the canonical ArcticDB S3 URI for ``bucket``.
43
+
44
+ Format: ``s3s://s3.{region}.amazonaws.com:{bucket}?path_prefix=arcticdb&aws_auth=true``
45
+
46
+ ``region`` defaults to ``AWS_REGION`` env var, then ``us-east-1``.
47
+ """
48
+ region = region or os.environ.get("AWS_REGION", "us-east-1")
49
+ return (
50
+ f"s3s://s3.{region}.amazonaws.com:{bucket}"
51
+ "?path_prefix=arcticdb&aws_auth=true"
52
+ )
53
+
54
+
55
+ def _import_arcticdb():
56
+ """Lazy import helper with a uniform error message."""
57
+ try:
58
+ import arcticdb as adb
59
+ except ImportError as exc:
60
+ raise RuntimeError(
61
+ "arcticdb is not importable — install "
62
+ "alpha-engine-lib[arcticdb] or add arcticdb to the deploy "
63
+ f"image: {exc}"
64
+ ) from exc
65
+ return adb
66
+
67
+
68
+ def open_arctic(bucket: str, *, region: str | None = None):
69
+ """Return an ``arcticdb.Arctic`` instance pointed at ``bucket``.
70
+
71
+ Raises ``RuntimeError`` if ``arcticdb`` is not installed.
72
+ """
73
+ adb = _import_arcticdb()
74
+ return adb.Arctic(arctic_uri(bucket, region=region))
75
+
76
+
77
+ def open_universe_lib(bucket: str, *, region: str | None = None) -> "Library":
78
+ """Open the ``universe`` library on ``bucket``.
79
+
80
+ Raises ``RuntimeError`` on any library-open failure, with bucket and
81
+ URI in the message so the operator can see which endpoint is failing.
82
+ """
83
+ arctic = open_arctic(bucket, region=region)
84
+ try:
85
+ return arctic.get_library(UNIVERSE_LIB)
86
+ except Exception as exc:
87
+ raise RuntimeError(
88
+ f"ArcticDB {UNIVERSE_LIB!r} library open failed on bucket "
89
+ f"{bucket!r} (uri={arctic_uri(bucket, region=region)}): {exc}"
90
+ ) from exc
91
+
92
+
93
+ def open_macro_lib(bucket: str, *, region: str | None = None) -> "Library":
94
+ """Open the ``macro`` library on ``bucket``.
95
+
96
+ Raises ``RuntimeError`` on any library-open failure.
97
+ """
98
+ arctic = open_arctic(bucket, region=region)
99
+ try:
100
+ return arctic.get_library(MACRO_LIB)
101
+ except Exception as exc:
102
+ raise RuntimeError(
103
+ f"ArcticDB {MACRO_LIB!r} library open failed on bucket "
104
+ f"{bucket!r} (uri={arctic_uri(bucket, region=region)}): {exc}"
105
+ ) from exc
106
+
107
+
108
+ def get_universe_symbols(bucket: str, *, region: str | None = None) -> set[str]:
109
+ """Return the set of symbols currently present in the universe library.
110
+
111
+ Common use case: filtering tickers against "what's actually in
112
+ ArcticDB right now" before passing to downstream code that hard-fails
113
+ on missing symbols (e.g. the executor's load_daily_vwap / load_atr_14_pct
114
+ guards, or the backtester's simulate replay of historical signals).
115
+
116
+ Raises ``RuntimeError`` on library-open or list failure — an
117
+ ArcticDB health problem is a pipeline-level precondition, not
118
+ something to silently paper over with an empty set.
119
+ """
120
+ lib = open_universe_lib(bucket, region=region)
121
+ try:
122
+ symbols = set(lib.list_symbols())
123
+ except Exception as exc:
124
+ raise RuntimeError(
125
+ f"ArcticDB {UNIVERSE_LIB}.list_symbols() failed on bucket "
126
+ f"{bucket!r}: {exc}"
127
+ ) from exc
128
+ log.info("ArcticDB %s symbols available: %d", UNIVERSE_LIB, len(symbols))
129
+ return symbols
130
+
131
+
132
+ # Default OHLCV columns. ``None`` (the load_universe_ohlcv default) reads the
133
+ # full stored frame so the result is a faithful slim-cache equivalent; pass
134
+ # this explicitly to narrow the read for perf when only prices are needed.
135
+ OHLCV_COLUMNS = ["Open", "High", "Low", "Close", "Volume"]
136
+
137
+ # The slim cache (alpha-engine-data collectors/slim_cache.py) writes a ~2-year
138
+ # tail slice of the full price_cache parquets. 730d is the parity-equivalent
139
+ # default so a load_universe_ohlcv() result lines up with load_slim_cache()
140
+ # over the same window; widen via lookback_days for backtester-style reads.
141
+ _SLIM_EQUIVALENT_LOOKBACK_DAYS = 730
142
+
143
+
144
+ def _load_arctic_frames(
145
+ lib,
146
+ symbols,
147
+ *,
148
+ lookback_days: int,
149
+ end,
150
+ columns,
151
+ max_workers: int,
152
+ label: str,
153
+ ) -> "dict[str, 'pd.DataFrame']":
154
+ """Shared read core for the universe + macro ArcticDB readers.
155
+
156
+ Reads a date-windowed slice of each ``symbols`` entry out of an
157
+ already-opened ArcticDB ``lib``, normalizing exactly like
158
+ ``load_slim_cache`` (tz-naive monotonic ``DatetimeIndex``, dup dates
159
+ collapsed keep=last) so a ``reconcile`` against slim compares
160
+ like-for-like. ``label`` only tunes log messages.
161
+
162
+ Contract (mirrors ``load_slim_cache``): individual symbol read failures
163
+ / empty frames are logged at WARNING and dropped — the caller decides
164
+ how to handle a partial load.
165
+ """
166
+ import pandas as pd # lazy: only needed with the [arcticdb] extra
167
+ from concurrent.futures import ThreadPoolExecutor, as_completed
168
+
169
+ symbols = sorted(set(symbols))
170
+ if not symbols:
171
+ return {}
172
+
173
+ end_ts = (
174
+ pd.Timestamp(end) if end is not None else pd.Timestamp.now(tz="UTC")
175
+ ).normalize()
176
+ if end_ts.tz is not None:
177
+ end_ts = end_ts.tz_localize(None)
178
+ start_ts = end_ts - pd.Timedelta(days=lookback_days)
179
+
180
+ def _read(sym: str):
181
+ read_kwargs = {"date_range": (start_ts, end_ts)}
182
+ if columns is not None:
183
+ read_kwargs["columns"] = list(columns)
184
+ df = lib.read(sym, **read_kwargs).data
185
+ if df is None or df.empty:
186
+ return sym, None
187
+ if not isinstance(df.index, pd.DatetimeIndex):
188
+ df.index = pd.to_datetime(df.index)
189
+ if df.index.tz is not None:
190
+ df.index = df.index.tz_convert("UTC").tz_localize(None)
191
+ df = df[~df.index.duplicated(keep="last")].sort_index()
192
+ return sym, df
193
+
194
+ out: "dict[str, pd.DataFrame]" = {}
195
+ errors = 0
196
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
197
+ futures = {pool.submit(_read, s): s for s in symbols}
198
+ for fut in as_completed(futures):
199
+ sym = futures[fut]
200
+ try:
201
+ ticker, df = fut.result()
202
+ except Exception as exc: # noqa: BLE001 - partial-load contract
203
+ log.warning("ArcticDB %s read failed for %s: %s", label, sym, exc)
204
+ errors += 1
205
+ continue
206
+ if df is None:
207
+ log.warning(
208
+ "ArcticDB %s returned empty frame for %s", label, sym
209
+ )
210
+ errors += 1
211
+ continue
212
+ out[ticker] = df
213
+
214
+ log.info(
215
+ "%s: %d symbols OK, %d errors (window %s..%s)",
216
+ label,
217
+ len(out),
218
+ errors,
219
+ start_ts.date(),
220
+ end_ts.date(),
221
+ )
222
+ return out
223
+
224
+
225
+ def load_universe_ohlcv(
226
+ bucket: str,
227
+ *,
228
+ symbols=None,
229
+ lookback_days: int = _SLIM_EQUIVALENT_LOOKBACK_DAYS,
230
+ end=None,
231
+ columns=None,
232
+ max_workers: int = 20,
233
+ region: str | None = None,
234
+ ) -> "dict[str, 'pd.DataFrame']":
235
+ """Load a ticker -> OHLCV DataFrame dict from the ArcticDB **universe** lib.
236
+
237
+ This is the **single source of truth** for "read a 2y-ish OHLCV slice per
238
+ ticker out of ArcticDB" — the read+dedup+normalize idiom that was
239
+ copy-pasted into predictor ``inference/stages/load_prices.py`` and is
240
+ needed again by the data macro-breadth / feature-compute and backtester
241
+ exit-timing consumers as they migrate off the
242
+ ``predictor/price_cache_slim/`` parquet tier. Returns the **same shape**
243
+ as ``alpha-engine-data store.parquet_loader.load_slim_cache`` (ticker ->
244
+ DataFrame with a tz-naive monotonic ``DatetimeIndex``) so it is a drop-in
245
+ substitute for slim-cache reads.
246
+
247
+ Contract (mirrors ``load_slim_cache``): individual ticker read failures /
248
+ empty frames are logged at WARNING and dropped from the result — the
249
+ caller decides how to handle a partial load. Returns ``{}`` if the
250
+ universe library has no symbols.
251
+
252
+ Note: the universe lib holds equities + SPY only. Macro/index series
253
+ (VIX, TNX, IRX, GLD, USO, VIX3M) and sector ETFs live in the **macro**
254
+ lib — use :func:`load_macro_series` for those.
255
+
256
+ Args:
257
+ bucket: S3 bucket backing ArcticDB.
258
+ symbols: iterable of tickers to read; ``None`` reads every symbol
259
+ currently in the universe library (``get_universe_symbols``).
260
+ lookback_days: window size; default matches the slim cache's ~2y tail.
261
+ end: window end (``pd.Timestamp``/str); ``None`` -> today (normalized).
262
+ columns: columns to read; ``None`` reads the full stored frame (true
263
+ slim-cache equivalent). Pass ``OHLCV_COLUMNS`` to narrow for perf.
264
+ max_workers: ThreadPool width for the per-ticker reads.
265
+ region: AWS region override (defaults via ``arctic_uri``).
266
+ """
267
+ if symbols is None:
268
+ symbols = get_universe_symbols(bucket, region=region)
269
+ symbols = sorted(set(symbols))
270
+ if not symbols:
271
+ log.warning(
272
+ "load_universe_ohlcv: universe library %r is empty on %r",
273
+ UNIVERSE_LIB,
274
+ bucket,
275
+ )
276
+ return {}
277
+ lib = open_universe_lib(bucket, region=region)
278
+ return _load_arctic_frames(
279
+ lib,
280
+ symbols,
281
+ lookback_days=lookback_days,
282
+ end=end,
283
+ columns=columns,
284
+ max_workers=max_workers,
285
+ label="load_universe_ohlcv",
286
+ )
287
+
288
+
289
+ def load_macro_series(
290
+ bucket: str,
291
+ symbols,
292
+ *,
293
+ lookback_days: int = _SLIM_EQUIVALENT_LOOKBACK_DAYS,
294
+ end=None,
295
+ columns=None,
296
+ max_workers: int = 20,
297
+ region: str | None = None,
298
+ ) -> "dict[str, 'pd.DataFrame']":
299
+ """Load a symbol -> OHLCV DataFrame dict from the ArcticDB **macro** lib.
300
+
301
+ The macro-lib analog of :func:`load_universe_ohlcv`, sharing the exact
302
+ same read+dedup+tz-normalize core so a ``reconcile`` against the slim
303
+ cache compares like-for-like. The macro lib holds the index/macro series
304
+ (SPY, VIX, VIX3M, TNX, IRX, GLD, USO) and the sector ETFs (XL*) that the
305
+ universe lib does **not** carry — i.e. exactly what
306
+ ``alpha-engine-data features/compute.py::_extract_macro`` needs as it
307
+ migrates off ``predictor/price_cache_slim/``.
308
+
309
+ ``symbols`` is **required** (no read-all default): the macro lib is
310
+ heterogeneous and contains non-price composite keys (e.g. a ``features``
311
+ symbol written by the data backfill) that must not be read as OHLCV.
312
+ Pass the explicit set the caller needs.
313
+
314
+ Same partial-load contract as :func:`load_universe_ohlcv`: per-symbol
315
+ read failures / empty frames are logged at WARNING and dropped. Returns
316
+ ``{}`` if ``symbols`` is empty.
317
+
318
+ Args:
319
+ bucket: S3 bucket backing ArcticDB.
320
+ symbols: explicit iterable of macro/ETF symbols to read (required).
321
+ lookback_days: window size; default matches the slim cache's ~2y tail.
322
+ end: window end (``pd.Timestamp``/str); ``None`` -> today (normalized).
323
+ columns: columns to read; ``None`` reads the full stored frame.
324
+ max_workers: ThreadPool width for the per-symbol reads.
325
+ region: AWS region override (defaults via ``arctic_uri``).
326
+ """
327
+ symbols = sorted(set(symbols))
328
+ if not symbols:
329
+ log.warning("load_macro_series: no symbols requested on %r", bucket)
330
+ return {}
331
+ lib = open_macro_lib(bucket, region=region)
332
+ return _load_arctic_frames(
333
+ lib,
334
+ symbols,
335
+ lookback_days=lookback_days,
336
+ end=end,
337
+ columns=columns,
338
+ max_workers=max_workers,
339
+ label="load_macro_series",
340
+ )
@@ -0,0 +1,69 @@
1
+ """Surface collector-style error dicts to Flow Doctor.
2
+
3
+ Many Alpha Engine orchestrators (alpha-engine-data weekly_collector, the
4
+ predictor training pipeline stages, research per-team collectors) catch
5
+ exceptions and convert them into a return-dict of the form::
6
+
7
+ {"status": "error", "error": "<message>"}
8
+
9
+ The orchestrator aggregates per-collector dicts into a final results
10
+ structure and continues running the remaining collectors. Without an
11
+ explicit ``logger.error()`` call, the underlying error never reaches
12
+ Flow Doctor's logging-handler-based capture path — the alert pipeline
13
+ only sees the orchestrator's generic "non-ok status" summary line, which
14
+ dedups all partial runs together and contains none of the real error
15
+ text needed for LLM diagnosis or actionable GitHub issues.
16
+
17
+ :func:`report_collector_errors` walks the collectors dict and emits one
18
+ ``logger.error()`` per error-status entry. Each emitted record carries
19
+ the collector name + original error message, producing distinct dedup
20
+ signatures and rich diagnose context.
21
+
22
+ Typical usage in an orchestrator's finalize step::
23
+
24
+ from alpha_engine_lib.collector_results import report_collector_errors
25
+
26
+ # ... run collectors, populate results["collectors"] ...
27
+ report_collector_errors(results["collectors"])
28
+ # write manifest, return results, etc.
29
+
30
+ Idempotent — safe to call multiple times in the same process.
31
+ Flow Doctor's per-yaml dedup window suppresses repeat alerts.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import logging
37
+ from typing import Any, Mapping
38
+
39
+
40
+ def report_collector_errors(
41
+ collectors: Mapping[str, Mapping[str, Any]],
42
+ logger: logging.Logger | None = None,
43
+ ) -> int:
44
+ """Log one ERROR per collector with ``status == "error"``.
45
+
46
+ :param collectors: Mapping of collector name → result dict. Each
47
+ result dict is expected to have a ``"status"`` key; entries
48
+ with status ``"error"`` also typically carry an ``"error"``
49
+ key with the message string.
50
+ :param logger: Logger to emit through. Defaults to
51
+ ``logging.getLogger(__name__)`` (which routes to the root
52
+ logger's handlers — including FlowDoctorHandler when
53
+ ``setup_logging(flow_doctor_yaml=...)`` has been called).
54
+ :return: Number of error entries logged.
55
+
56
+ Non-mapping values, missing ``status`` keys, and any non-error
57
+ status are ignored silently. The function never raises.
58
+ """
59
+ log = logger or logging.getLogger(__name__)
60
+ count = 0
61
+ for name, info in collectors.items():
62
+ if not isinstance(info, Mapping):
63
+ continue
64
+ if info.get("status") != "error":
65
+ continue
66
+ err = info.get("error", "<no error message>")
67
+ log.error("collector %s failed: %s", name, err)
68
+ count += 1
69
+ return count