alpha-engine-lib 0.46.0__tar.gz → 0.48.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {alpha_engine_lib-0.46.0/src/alpha_engine_lib.egg-info → alpha_engine_lib-0.48.0}/PKG-INFO +12 -3
  2. alpha_engine_lib-0.46.0/PKG-INFO → alpha_engine_lib-0.48.0/README.md +6 -32
  3. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/pyproject.toml +7 -2
  4. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/__init__.py +1 -1
  5. alpha_engine_lib-0.48.0/src/alpha_engine_lib/http_retry.py +199 -0
  6. alpha_engine_lib-0.48.0/src/alpha_engine_lib/quant/factor_risk_xs.py +332 -0
  7. alpha_engine_lib-0.46.0/README.md → alpha_engine_lib-0.48.0/src/alpha_engine_lib.egg-info/PKG-INFO +41 -1
  8. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib.egg-info/SOURCES.txt +4 -0
  9. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib.egg-info/requires.txt +5 -0
  10. alpha_engine_lib-0.48.0/tests/test_http_retry.py +199 -0
  11. alpha_engine_lib-0.48.0/tests/test_quant_factor_risk_xs.py +413 -0
  12. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/setup.cfg +0 -0
  13. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/agent_schemas.py +0 -0
  14. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/alerts.py +0 -0
  15. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/anthropic_payload.py +0 -0
  16. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/arcticdb.py +0 -0
  17. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/artifact_freshness.py +0 -0
  18. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/collector_results.py +0 -0
  19. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/cost.py +0 -0
  20. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/dates.py +0 -0
  21. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/decision_capture.py +0 -0
  22. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/ec2_spot.py +0 -0
  23. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/email_sender.py +0 -0
  24. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/eval_artifacts.py +0 -0
  25. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/locks.py +0 -0
  26. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/logging.py +0 -0
  27. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/model_pricing.yaml +0 -0
  28. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/pillars.py +0 -0
  29. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/pipeline_status/__init__.py +0 -0
  30. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/pipeline_status/read.py +0 -0
  31. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/pipeline_status/registry.py +0 -0
  32. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/pipeline_status/templates.py +0 -0
  33. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/preflight.py +0 -0
  34. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/quant/__init__.py +0 -0
  35. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/quant/attribution.py +0 -0
  36. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/quant/factor_risk.py +0 -0
  37. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/quant/returns.py +0 -0
  38. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/quant/risk_measures.py +0 -0
  39. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/quant/riskstats.py +0 -0
  40. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/rag/__init__.py +0 -0
  41. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/rag/db.py +0 -0
  42. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/rag/embeddings.py +0 -0
  43. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/rag/migrations/0001_content_tsv.sql +0 -0
  44. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/rag/rerank.py +0 -0
  45. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/rag/retrieval.py +0 -0
  46. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/rag/schema.sql +0 -0
  47. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/reconcile.py +0 -0
  48. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/secrets.py +0 -0
  49. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/sources/__init__.py +0 -0
  50. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/sources/protocols.py +0 -0
  51. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/ssm_dispatcher.py +0 -0
  52. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/ssm_log_capture.py +0 -0
  53. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/telegram.py +0 -0
  54. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/trading_calendar.py +0 -0
  55. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/transparency.py +0 -0
  56. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/transparency_inventory.yaml +0 -0
  57. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib/universe.py +0 -0
  58. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib.egg-info/dependency_links.txt +0 -0
  59. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/src/alpha_engine_lib.egg-info/top_level.txt +0 -0
  60. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_agent_schemas.py +0 -0
  61. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_alerts.py +0 -0
  62. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_anthropic_payload.py +0 -0
  63. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_arcticdb.py +0 -0
  64. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_artifact_freshness.py +0 -0
  65. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_collector_results.py +0 -0
  66. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_cost.py +0 -0
  67. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_dates.py +0 -0
  68. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_decision_capture.py +0 -0
  69. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_ec2_spot.py +0 -0
  70. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_email_sender.py +0 -0
  71. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_eval_artifacts.py +0 -0
  72. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_locks.py +0 -0
  73. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_logging.py +0 -0
  74. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_pillars.py +0 -0
  75. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_pipeline_status_read.py +0 -0
  76. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_pipeline_status_registry.py +0 -0
  77. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_pipeline_status_templates.py +0 -0
  78. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_preflight.py +0 -0
  79. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_quant_attribution.py +0 -0
  80. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_quant_factor_risk.py +0 -0
  81. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_quant_returns.py +0 -0
  82. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_quant_risk_measures.py +0 -0
  83. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_quant_riskstats.py +0 -0
  84. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_rag.py +0 -0
  85. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_rag_rerank.py +0 -0
  86. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_rag_retrieval_hybrid.py +0 -0
  87. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_reconcile.py +0 -0
  88. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_secrets.py +0 -0
  89. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_sources_protocols.py +0 -0
  90. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_ssm_dispatcher.py +0 -0
  91. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_ssm_log_capture.py +0 -0
  92. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_telegram.py +0 -0
  93. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_trading_calendar.py +0 -0
  94. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_transparency.py +0 -0
  95. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_universe.py +0 -0
  96. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_version_bump_workflow.py +0 -0
  97. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.48.0}/tests/test_version_pin.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alpha-engine-lib
3
- Version: 0.46.0
4
- Summary: Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README.
3
+ Version: 0.48.0
4
+ Summary: Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, S3-conditional-PUT writer locks, and bounded-backoff HTTP retry. Full surface documented in README.
5
5
  Author: Brian McMahon
6
6
  License: Proprietary
7
7
  Requires-Python: >=3.9
@@ -16,6 +16,10 @@ Requires-Dist: arcticdb>=6.11; extra == "arcticdb"
16
16
  Requires-Dist: pandas>=2.0; extra == "arcticdb"
17
17
  Provides-Extra: quant
18
18
  Requires-Dist: numpy>=1.24; extra == "quant"
19
+ Provides-Extra: quant-xs
20
+ Requires-Dist: numpy>=1.24; extra == "quant-xs"
21
+ Requires-Dist: pandas>=2.0; extra == "quant-xs"
22
+ Requires-Dist: scikit-learn>=1.0; extra == "quant-xs"
19
23
  Provides-Extra: flow-doctor
20
24
  Requires-Dist: flow-doctor[diagnosis,s3]<0.5.0,>=0.4.0; extra == "flow-doctor"
21
25
  Provides-Extra: rag
@@ -254,12 +258,17 @@ Rotates across `(instance_type × subnet)` combinations on `InsufficientInstance
254
258
 
255
259
  The shared institutional-analytics engine: pure, front-end- and data-source-agnostic functions that *describe and measure* a portfolio (performance, risk, attribution) with **no advisory logic** — it sits on the "analytics, not advice" side of the line. Lifted from robodashboard's `analytics/` after the 2026-06-03 cross-repo leverage audit, so both the alpha-engine fleet and robodashboard consume one engine instead of parallel reimplementations. Import the submodule you need (the package keeps no eager imports, so the stdlib-only modules import without numpy):
256
260
 
257
- - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`: `estimate_factor_model` (time-series factor-ETF / Fama-MacBeth loadings), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
261
+ - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`, **Option B** (time-series factor-ETF estimator): `estimate_factor_model` (regress holdings on given factor return series), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
262
+ - **`quant.factor_risk_xs`** — same `Σ = B·F·Bᵀ + D` model, **Option A** (universe-wide cross-sectional Fama-MacBeth estimator): take *exogenous* per-ticker loadings `B` and infer factor returns `f_t` via a cross-sectional OLS at each date → `F`/`D` (`build_factor_risk_model`, `cross_sectional_factor_returns`, `estimate_factor_covariance`, `estimate_idiosyncratic_variance`). **Needs pandas + scikit-learn** — `pip install "alpha-engine-lib[quant-xs]"` (kept separate so numpy-only consumers stay light).
258
263
  - **`quant.risk_measures`** — parametric (Gaussian, Acklam inverse-normal, no scipy) + historical VaR & CVaR, as positive loss fractions at a horizon (stdlib).
259
264
  - **`quant.riskstats`** — `volatility`, `sharpe_ratio`, `sortino_ratio`, `max_drawdown` (stdlib).
260
265
  - **`quant.returns`** — `xirr` (money-weighted, Newton + bisection), `time_weighted_return` (GIPS), `cumulative_return`, `annualize` (stdlib).
261
266
  - **`quant.attribution`** — single-period Brinson-Fachler decomposition (`brinson_fachler`) + multi-period Cariño linking (`link_periods`) (stdlib).
262
267
 
268
+ ### `http_retry` — bounded-backoff transient-API retry chokepoint
269
+
270
+ `request_with_retry(url, *, params, session, transient_status, ...)` returns the final `requests.Response` after retrying the transient class — 429 + 5xx responses (honoring `Retry-After`) and `Timeout`/`ConnectionError` network errors — with exponential backoff + full jitter; an exhausted network error raises `HttpRetryError` (api-key-scrubbed), while a persistent transient-status response is returned for the caller to interpret (so a 403, not in the transient set, is handed back for e.g. polygon's `PolygonForbiddenError` conversion). Also exposes the low-level `backoff_delay(attempt, *, base, cap, retry_after)` and `scrub_api_keys(msg)` (masks `api_key=`/`apiKey=` querystring values) for consumers with bespoke loops (the rate-limited `polygon_client` keeps its own loop + 403 + JSON parse and reuses just the delay math + scrubber). Consolidates the four mirrored alpha-engine-data retry sites (FRED fetch, polygon client, preflight reachability, FRED repair) into one policy so they stop drifting (L4499). Stdlib + `requests` only.
271
+
263
272
  ```python
264
273
  from alpha_engine_lib.quant.risk_measures import historical_cvar
265
274
  from alpha_engine_lib.quant.factor_risk import estimate_factor_model, portfolio_risk
@@ -1,34 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: alpha-engine-lib
3
- Version: 0.46.0
4
- Summary: Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README.
5
- Author: Brian McMahon
6
- License: Proprietary
7
- Requires-Python: >=3.9
8
- Description-Content-Type: text/markdown
9
- Requires-Dist: boto3>=1.34
10
- Requires-Dist: pydantic>=2.0
11
- Requires-Dist: pyyaml>=6.0
12
- Requires-Dist: requests>=2.31
13
- Requires-Dist: eval_type_backport>=0.2.0; python_version < "3.10"
14
- Provides-Extra: arcticdb
15
- Requires-Dist: arcticdb>=6.11; extra == "arcticdb"
16
- Requires-Dist: pandas>=2.0; extra == "arcticdb"
17
- Provides-Extra: quant
18
- Requires-Dist: numpy>=1.24; extra == "quant"
19
- Provides-Extra: flow-doctor
20
- Requires-Dist: flow-doctor[diagnosis,s3]<0.5.0,>=0.4.0; extra == "flow-doctor"
21
- Provides-Extra: rag
22
- Requires-Dist: psycopg2-binary>=2.9; extra == "rag"
23
- Requires-Dist: pgvector>=0.2; extra == "rag"
24
- Requires-Dist: numpy>=1.24; extra == "rag"
25
- Provides-Extra: rerank
26
- Requires-Dist: sentence-transformers>=3.0; extra == "rerank"
27
- Provides-Extra: dev
28
- Requires-Dist: pytest>=7.0; extra == "dev"
29
- Requires-Dist: pytest-cov>=4.0; extra == "dev"
30
- Requires-Dist: moto>=5.0; extra == "dev"
31
-
32
1
  # alpha-engine-lib
33
2
 
34
3
  > Part of [**Nous Ergon**](https://nousergon.ai) — Autonomous Multi-Agent Trading System. Repo and S3 names use the underlying project name `alpha-engine`.
@@ -254,12 +223,17 @@ Rotates across `(instance_type × subnet)` combinations on `InsufficientInstance
254
223
 
255
224
  The shared institutional-analytics engine: pure, front-end- and data-source-agnostic functions that *describe and measure* a portfolio (performance, risk, attribution) with **no advisory logic** — it sits on the "analytics, not advice" side of the line. Lifted from robodashboard's `analytics/` after the 2026-06-03 cross-repo leverage audit, so both the alpha-engine fleet and robodashboard consume one engine instead of parallel reimplementations. Import the submodule you need (the package keeps no eager imports, so the stdlib-only modules import without numpy):
256
225
 
257
- - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`: `estimate_factor_model` (time-series factor-ETF / Fama-MacBeth loadings), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
226
+ - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`, **Option B** (time-series factor-ETF estimator): `estimate_factor_model` (regress holdings on given factor return series), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
227
+ - **`quant.factor_risk_xs`** — same `Σ = B·F·Bᵀ + D` model, **Option A** (universe-wide cross-sectional Fama-MacBeth estimator): take *exogenous* per-ticker loadings `B` and infer factor returns `f_t` via a cross-sectional OLS at each date → `F`/`D` (`build_factor_risk_model`, `cross_sectional_factor_returns`, `estimate_factor_covariance`, `estimate_idiosyncratic_variance`). **Needs pandas + scikit-learn** — `pip install "alpha-engine-lib[quant-xs]"` (kept separate so numpy-only consumers stay light).
258
228
  - **`quant.risk_measures`** — parametric (Gaussian, Acklam inverse-normal, no scipy) + historical VaR & CVaR, as positive loss fractions at a horizon (stdlib).
259
229
  - **`quant.riskstats`** — `volatility`, `sharpe_ratio`, `sortino_ratio`, `max_drawdown` (stdlib).
260
230
  - **`quant.returns`** — `xirr` (money-weighted, Newton + bisection), `time_weighted_return` (GIPS), `cumulative_return`, `annualize` (stdlib).
261
231
  - **`quant.attribution`** — single-period Brinson-Fachler decomposition (`brinson_fachler`) + multi-period Cariño linking (`link_periods`) (stdlib).
262
232
 
233
+ ### `http_retry` — bounded-backoff transient-API retry chokepoint
234
+
235
+ `request_with_retry(url, *, params, session, transient_status, ...)` returns the final `requests.Response` after retrying the transient class — 429 + 5xx responses (honoring `Retry-After`) and `Timeout`/`ConnectionError` network errors — with exponential backoff + full jitter; an exhausted network error raises `HttpRetryError` (api-key-scrubbed), while a persistent transient-status response is returned for the caller to interpret (so a 403, not in the transient set, is handed back for e.g. polygon's `PolygonForbiddenError` conversion). Also exposes the low-level `backoff_delay(attempt, *, base, cap, retry_after)` and `scrub_api_keys(msg)` (masks `api_key=`/`apiKey=` querystring values) for consumers with bespoke loops (the rate-limited `polygon_client` keeps its own loop + 403 + JSON parse and reuses just the delay math + scrubber). Consolidates the four mirrored alpha-engine-data retry sites (FRED fetch, polygon client, preflight reachability, FRED repair) into one policy so they stop drifting (L4499). Stdlib + `requests` only.
236
+
263
237
  ```python
264
238
  from alpha_engine_lib.quant.risk_measures import historical_cvar
265
239
  from alpha_engine_lib.quant.factor_risk import estimate_factor_model, portfolio_risk
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alpha-engine-lib"
7
- version = "0.46.0"
8
- description = "Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README."
7
+ version = "0.48.0"
8
+ description = "Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, S3-conditional-PUT writer locks, and bounded-backoff HTTP retry. Full surface documented in README."
9
9
  readme = "README.md"
10
10
  # EC2 still runs Python 3.9 on the always-on micro instance (boto3 drops
11
11
  # 3.9 support 2026-04-29, so upgrade is on the near-term roadmap). All
@@ -34,6 +34,11 @@ arcticdb = ["arcticdb>=6.11", "pandas>=2.0"]
34
34
  # factor-risk module needs numpy; the VaR/CVaR, riskstats, returns, and
35
35
  # attribution modules are pure stdlib and import without this extra.
36
36
  quant = ["numpy>=1.24"]
37
+ # Cross-sectional (Fama-MacBeth) factor risk model — quant.factor_risk_xs.
38
+ # Needs pandas (always) + scikit-learn (LedoitWolf/OAS shrinkage). Kept
39
+ # separate from [quant] so the numpy-only consumers (e.g. robodashboard)
40
+ # don't pull pandas+sklearn.
41
+ quant-xs = ["numpy>=1.24", "pandas>=2.0", "scikit-learn>=1.0"]
37
42
  flow_doctor = ["flow-doctor[diagnosis,s3]>=0.4.0,<0.5.0"]
38
43
  rag = [
39
44
  "psycopg2-binary>=2.9",
@@ -1,3 +1,3 @@
1
1
  """alpha-engine-lib — shared utilities for Alpha Engine modules."""
2
2
 
3
- __version__ = "0.46.0"
3
+ __version__ = "0.48.0"
@@ -0,0 +1,199 @@
1
+ """Bounded-backoff HTTP retry primitive — the transient external-API
2
+ resilience chokepoint (L4499).
3
+
4
+ Consolidates the backoff + full-jitter + ``Retry-After`` + api-key-scrub
5
+ retry idiom that was mirrored across four alpha-engine-data sites:
6
+
7
+ * ``collectors/daily_closes.py::_fred_get_with_retry`` (L4480)
8
+ * ``polygon_client.py::_get`` / ``_backoff`` (L4496)
9
+ * ``preflight.py::_reachability_get`` (L4494)
10
+ * ``collectors/daily_closes_fred_repair.py::_fetch_fred_range``
11
+
12
+ Each had its own copy of "exponential backoff + full jitter, honor
13
+ ``Retry-After``, retry the transient class, scrub the api-key from the
14
+ error before logging/raising, then fail loud." This module is the single
15
+ source of truth for that policy so the four callsites stop drifting.
16
+
17
+ Two layers are exported:
18
+
19
+ * :func:`request_with_retry` — the full GET-with-retry for the plain
20
+ callsites (FRED fetch, preflight probe, FRED repair). Returns the final
21
+ ``requests.Response``; the caller still owns status interpretation
22
+ (``raise_for_status`` / special-casing a 403), so genuinely different
23
+ consumers compose it without a leaky mega-config.
24
+ * :func:`backoff_delay` + :func:`scrub_api_keys` — the low-level pieces for
25
+ a consumer with bespoke control flow (the rate-limited ``polygon_client``
26
+ keeps its own loop + 403 handling + JSON parse + rate limiter, but shares
27
+ the delay math and the scrubber).
28
+
29
+ Design note (anti-over-engineering): this is deliberately NOT a
30
+ pluggable-everything HTTP framework. It captures the one invariant the four
31
+ sites share; consumers whose semantics diverge (polygon's 403 + rate limiter)
32
+ reuse the primitives rather than being forced through a generic loop.
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ import logging as _logging
38
+ import random as _random
39
+ import re
40
+ import time as _time
41
+ from typing import Callable, Iterable
42
+
43
+ import requests
44
+
45
+ _DEFAULT_LOGGER = _logging.getLogger(__name__)
46
+
47
+ # Transient HTTP status class: 429 (rate limit) + the retryable 5xx. A 4xx
48
+ # other than 429 is a deterministic client error — retrying it is pointless,
49
+ # so it is NOT in the default set and is returned to the caller as-is.
50
+ DEFAULT_TRANSIENT_STATUS: "frozenset[int]" = frozenset({429, 500, 502, 503, 504})
51
+
52
+ # Mask FRED ``api_key=`` (snake) and polygon ``apiKey=`` (camel) querystring
53
+ # VALUES — both leak via ``requests`` exception ``str()`` (the effective URL)
54
+ # and via hand-built error strings. Mirrors the per-repo scrubbers this module
55
+ # replaces; complements ``alpha_engine_lib.logging.SecretsRedactingFilter``
56
+ # (which catches token-shaped secrets, not query-param api keys).
57
+ _API_KEY_RE = re.compile(r"(?:api_key|apiKey)=[^&\s]+")
58
+
59
+
60
+ def scrub_api_keys(msg: object) -> str:
61
+ """Mask ``api_key=...`` / ``apiKey=...`` querystring values in a string.
62
+
63
+ Preserves the key NAME (so logs still show *which* param) and the value
64
+ delimiter, replacing only the secret value with ``***``. Idempotent.
65
+ """
66
+ return _API_KEY_RE.sub(lambda m: m.group(0).split("=", 1)[0] + "=***", str(msg))
67
+
68
+
69
+ class HttpRetryError(RuntimeError):
70
+ """Raised when all attempts are exhausted on a transient NETWORK error
71
+ (``requests.Timeout`` / ``requests.ConnectionError``) or a non-transient
72
+ ``RequestException``.
73
+
74
+ The message is api-key-scrubbed. The originating exception is preserved
75
+ as ``__cause__`` (and on ``.last_exc``); ``.label`` / ``.attempts`` carry
76
+ context for callers that want to re-wrap (e.g. preflight's
77
+ ``RuntimeError(... unreachable ...)``).
78
+ """
79
+
80
+ def __init__(self, label: str, attempts: int, last_exc: BaseException) -> None:
81
+ self.label = label
82
+ self.attempts = attempts
83
+ self.last_exc = last_exc
84
+ super().__init__(
85
+ scrub_api_keys(
86
+ f"{label or 'request'} failed after {attempts} attempt(s): {last_exc}"
87
+ )
88
+ )
89
+
90
+
91
+ def backoff_delay(
92
+ attempt: int,
93
+ *,
94
+ base: float = 1.0,
95
+ cap: float = 30.0,
96
+ retry_after: "str | float | None" = None,
97
+ rng: "_random.Random | None" = None,
98
+ ) -> float:
99
+ """Full-jitter exponential backoff: ``min(base*2**attempt + U(0, base), cap)``.
100
+
101
+ ``attempt`` is 0-indexed. Honors a server ``Retry-After`` (seconds, str or
102
+ float) when supplied — a numeric value replaces the exponential term (still
103
+ + jitter, still capped); a non-numeric ``Retry-After`` (HTTP-date form)
104
+ falls back to the exponential term. ``rng`` is injectable for deterministic
105
+ tests.
106
+ """
107
+ wait: "float | None" = None
108
+ if retry_after is not None:
109
+ try:
110
+ wait = float(retry_after)
111
+ except (TypeError, ValueError):
112
+ wait = None
113
+ if wait is None:
114
+ wait = base * (2 ** attempt)
115
+ jitter = (rng or _random).uniform(0, base)
116
+ return min(wait + jitter, cap)
117
+
118
+
119
+ def request_with_retry(
120
+ url: str,
121
+ *,
122
+ method: str = "GET",
123
+ params: "dict | None" = None,
124
+ session: "requests.Session | None" = None,
125
+ timeout: float = 15.0,
126
+ max_attempts: int = 3,
127
+ backoff_base: float = 1.0,
128
+ backoff_cap: float = 30.0,
129
+ transient_status: Iterable[int] = DEFAULT_TRANSIENT_STATUS,
130
+ retry_network: bool = True,
131
+ honor_retry_after: bool = True,
132
+ scrub: Callable[[object], str] = scrub_api_keys,
133
+ logger: "_logging.Logger | None" = None,
134
+ label: str = "",
135
+ sleep: Callable[[float], None] = _time.sleep,
136
+ ) -> requests.Response:
137
+ """``method`` ``url`` with bounded backoff + full jitter on the transient
138
+ class, returning the final :class:`requests.Response`.
139
+
140
+ Retries:
141
+ * responses whose status is in ``transient_status`` (default 429 + 5xx),
142
+ honoring ``Retry-After`` when ``honor_retry_after``; and
143
+ * (when ``retry_network``) ``requests.Timeout`` / ``ConnectionError``.
144
+
145
+ Terminal behavior:
146
+ * a transient-status response that survives ``max_attempts`` is
147
+ **returned** — the caller decides whether to ``raise_for_status`` or
148
+ special-case it (e.g. a 403, which is NOT in the transient set, is
149
+ returned immediately for the caller to convert); and
150
+ * an exhausted NETWORK error (or a non-transient ``RequestException``
151
+ such as a bad URL) raises :class:`HttpRetryError` (scrubbed).
152
+
153
+ ``scrub`` is applied to every error string logged or raised. ``session``
154
+ lets a caller reuse a session (e.g. one carrying auth query params).
155
+ ``sleep`` is injectable for tests. ``max_attempts`` must be >= 1.
156
+ """
157
+ if max_attempts < 1:
158
+ raise ValueError(f"max_attempts must be >= 1, got {max_attempts}")
159
+ log = logger or _DEFAULT_LOGGER
160
+ transient = frozenset(transient_status)
161
+ requester = (session or requests).request
162
+ resp: "requests.Response | None" = None
163
+ for attempt in range(max_attempts):
164
+ last = attempt == max_attempts - 1
165
+ try:
166
+ resp = requester(method, url, params=params or {}, timeout=timeout)
167
+ except (requests.Timeout, requests.ConnectionError) as exc:
168
+ if not retry_network or last:
169
+ raise HttpRetryError(label, attempt + 1, exc) from exc
170
+ delay = backoff_delay(attempt, base=backoff_base, cap=backoff_cap)
171
+ log.warning(
172
+ "%s transient %s — backing off %.1fs (attempt %d/%d)",
173
+ label or url, type(exc).__name__, delay, attempt + 1, max_attempts,
174
+ )
175
+ sleep(delay)
176
+ continue
177
+ except requests.RequestException as exc:
178
+ # Non-transient (bad URL / too many redirects / invalid schema) —
179
+ # retrying a deterministic error is pointless; fail loud now.
180
+ raise HttpRetryError(label, attempt + 1, exc) from exc
181
+
182
+ if resp.status_code in transient and not last:
183
+ retry_after = resp.headers.get("Retry-After") if honor_retry_after else None
184
+ delay = backoff_delay(
185
+ attempt, base=backoff_base, cap=backoff_cap, retry_after=retry_after,
186
+ )
187
+ log.warning(
188
+ "%s HTTP %d — backing off %.1fs (attempt %d/%d)",
189
+ label or url, resp.status_code, delay, attempt + 1, max_attempts,
190
+ )
191
+ sleep(delay)
192
+ continue
193
+ return resp
194
+
195
+ # Loop exhausted on transient-status responses: return the last one for the
196
+ # caller to interpret (network exhaustion already raised above). resp is
197
+ # non-None because max_attempts >= 1 guarantees at least one assignment.
198
+ assert resp is not None
199
+ return resp
@@ -0,0 +1,332 @@
1
+ """Cross-sectional (Fama-MacBeth) factor risk model — the "Option A" estimator.
2
+
3
+ Complements ``quant.factor_risk`` (the "Option B" time-series factor-ETF
4
+ estimator). Both produce the inputs to the same Σ = B·F·Bᵀ + D structural
5
+ covariance consumed by ``quant.factor_risk.portfolio_risk`` / ``tracking_error``;
6
+ they differ only in how the factor returns ``f_t`` and the factor covariance
7
+ ``F`` are estimated:
8
+
9
+ - **Option B** (``factor_risk.estimate_factor_model``) — regress each holding's
10
+ return series on a small set of *given* factor return series (market +
11
+ style-ETF spreads). Loadings ``B`` are the regression betas. numpy-only.
12
+ - **Option A** (here) — take *exogenous* per-ticker factor loadings ``B`` (e.g.
13
+ fundamentals-derived style exposures) and infer the factor returns ``f_t`` by
14
+ a cross-sectional OLS at each date (Fama-MacBeth 1973):
15
+
16
+ r_t = B_{t-1} · f_t + ε_t
17
+
18
+ Stacking ``f_t`` over a rolling window gives a (T × K) factor-return panel;
19
+ ``F`` is its (Ledoit-Wolf-shrunk) covariance and ``D`` the per-ticker
20
+ time-series variance of the residuals ε. This is the universe-wide Barra-lite
21
+ build.
22
+
23
+ **Dependencies:** pandas (always) + scikit-learn (lazy, only for the
24
+ ``ledoit_wolf``/``oas`` shrinkage estimators). Install ``alpha-engine-lib[quant-xs]``.
25
+ Kept in its own module so the numpy-only ``factor_risk``/``risk_measures``/etc.
26
+ consumers don't pull pandas+sklearn.
27
+
28
+ References:
29
+ - Fama & MacBeth 1973 "Risk, Return, and Equilibrium: Empirical Tests"
30
+ (JPE 81(3)) — cross-sectional-regression construction of factor returns
31
+ - Grinold & Kahn 2000, _Active Portfolio Management_, Ch. 3 — canonical
32
+ structural factor risk model
33
+ - Menchero, Orr & Wang 2011 "The Barra US Equity Model (USE4)
34
+ Methodology Notes" — operational reference
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import logging
40
+ from typing import Iterable
41
+
42
+ import numpy as np
43
+ import pandas as pd
44
+
45
+ log = logging.getLogger(__name__)
46
+
47
+
48
+ _MIN_OBS_OVER_K = 5 # require ≥ K + 5 valid observations for a stable regression
49
+
50
+
51
+ def cross_sectional_factor_returns(
52
+ returns_t: np.ndarray,
53
+ loadings_prev: np.ndarray,
54
+ *,
55
+ include_intercept: bool = True,
56
+ ) -> tuple[np.ndarray, np.ndarray]:
57
+ """Solve r_t = B_{t-1} · f_t + ε_t for one date via OLS.
58
+
59
+ Args:
60
+ returns_t: (N,) realized returns at time t.
61
+ loadings_prev: (N, K) factor loadings at time t-1.
62
+ include_intercept: if True, prepends a column of 1s to the
63
+ loadings (the "market" factor return). f_t[0] becomes the
64
+ cross-sectional mean return; f_t[1:] are the per-factor
65
+ slopes. Default True.
66
+
67
+ Returns:
68
+ (f_t, residuals):
69
+ • f_t: (K_eff,) factor return vector — length K+1 with
70
+ intercept, K without.
71
+ • residuals: (N,) per-ticker ε_t. NaN for rows where the
72
+ inputs had NaN (preserved positionally so the caller can
73
+ keep aligning with the universe).
74
+
75
+ Rows with NaN in either r_t or any column of B_{t-1} are excluded
76
+ from the regression. If fewer than K_eff + 5 valid rows remain
77
+ (the regression is unstable), returns all-NaN for both outputs.
78
+ """
79
+ returns_t = np.asarray(returns_t, dtype=np.float64).ravel()
80
+ loadings_prev = np.asarray(loadings_prev, dtype=np.float64)
81
+ if loadings_prev.ndim != 2:
82
+ raise ValueError(
83
+ f"loadings_prev must be 2-D (N × K); got shape {loadings_prev.shape}"
84
+ )
85
+ N, K = loadings_prev.shape
86
+ if returns_t.shape != (N,):
87
+ raise ValueError(
88
+ f"returns_t shape {returns_t.shape} != ({N},) matching loadings rows"
89
+ )
90
+
91
+ if include_intercept:
92
+ B = np.column_stack([np.ones(N), loadings_prev])
93
+ K_eff = K + 1
94
+ else:
95
+ B = loadings_prev
96
+ K_eff = K
97
+
98
+ valid = np.isfinite(returns_t) & np.all(np.isfinite(B), axis=1)
99
+ n_valid = int(valid.sum())
100
+ if n_valid < K_eff + _MIN_OBS_OVER_K:
101
+ return np.full(K_eff, np.nan), np.full(N, np.nan)
102
+
103
+ r_valid = returns_t[valid]
104
+ B_valid = B[valid]
105
+
106
+ # OLS via lstsq is rank-robust (returns minimum-norm solution if B
107
+ # is rank-deficient). Rank-deficient B is a soft warning, not an
108
+ # error — caller decides whether to drop low-rank dates.
109
+ f_t, *_ = np.linalg.lstsq(B_valid, r_valid, rcond=None)
110
+
111
+ residuals = np.full(N, np.nan)
112
+ residuals[valid] = r_valid - B_valid @ f_t
113
+ return f_t, residuals
114
+
115
+
116
+ def build_factor_returns_series(
117
+ returns_panel: pd.DataFrame,
118
+ loadings_by_date: dict[pd.Timestamp, pd.DataFrame],
119
+ *,
120
+ include_intercept: bool = True,
121
+ factor_names: Iterable[str] | None = None,
122
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
123
+ """Loop over dates in ``returns_panel``; for each date t, run the
124
+ cross-sectional regression r_t = B_{t-1} · f_t + ε_t.
125
+
126
+ Args:
127
+ returns_panel: (T × N) DataFrame indexed by date, columns are
128
+ ticker names. r_t is the t-th row.
129
+ loadings_by_date: mapping date_t-1 → (N × K) DataFrame of
130
+ factor loadings for that date. Indexed by ticker, columns
131
+ are factor names. The driver looks up loadings at the
132
+ previous available date for each t (most recent ≤ t-1).
133
+ include_intercept: prepends a market-factor column. See
134
+ cross_sectional_factor_returns. Default True.
135
+ factor_names: optional explicit order for the K factor columns.
136
+ If provided, loadings_by_date entries are reindexed to this
137
+ order. Default: use the order of the first loadings frame.
138
+
139
+ Returns:
140
+ (factor_returns_df, residuals_df):
141
+ • factor_returns_df: (T × K_eff) — index matches returns_panel
142
+ dates; columns are ["market", *factor_names] when intercept
143
+ is on, [*factor_names] when off.
144
+ • residuals_df: (T × N) — same shape as returns_panel; NaN
145
+ where the regression was skipped or input was missing.
146
+ """
147
+ if returns_panel.empty:
148
+ return pd.DataFrame(), pd.DataFrame()
149
+
150
+ dates = list(returns_panel.index)
151
+ tickers = list(returns_panel.columns)
152
+ N = len(tickers)
153
+
154
+ # Resolve canonical factor name list from the first usable loadings frame
155
+ if factor_names is None:
156
+ sample = next(iter(loadings_by_date.values()), None)
157
+ if sample is None:
158
+ raise ValueError("loadings_by_date is empty — nothing to regress against")
159
+ factor_names = list(sample.columns)
160
+ factor_names = list(factor_names)
161
+ K = len(factor_names)
162
+
163
+ col_names = (["market"] + factor_names) if include_intercept else factor_names
164
+
165
+ f_panel = np.full((len(dates), len(col_names)), np.nan)
166
+ eps_panel = np.full((len(dates), N), np.nan)
167
+
168
+ sorted_loading_dates = sorted(loadings_by_date.keys())
169
+
170
+ for i, date_t in enumerate(dates):
171
+ prev_date = _latest_loading_date_at_or_before(sorted_loading_dates, date_t)
172
+ if prev_date is None:
173
+ continue
174
+ B_df = loadings_by_date[prev_date].reindex(index=tickers, columns=factor_names)
175
+ if B_df.empty:
176
+ continue
177
+ B = B_df.to_numpy(dtype=np.float64)
178
+ r = returns_panel.iloc[i].to_numpy(dtype=np.float64)
179
+
180
+ f_t, residuals = cross_sectional_factor_returns(
181
+ r, B, include_intercept=include_intercept,
182
+ )
183
+ f_panel[i] = f_t
184
+ eps_panel[i] = residuals
185
+
186
+ factor_returns_df = pd.DataFrame(f_panel, index=dates, columns=col_names)
187
+ residuals_df = pd.DataFrame(eps_panel, index=dates, columns=tickers)
188
+ return factor_returns_df, residuals_df
189
+
190
+
191
+ def _latest_loading_date_at_or_before(
192
+ sorted_dates: list[pd.Timestamp], cutoff: pd.Timestamp,
193
+ ) -> pd.Timestamp | None:
194
+ """Bisect for the latest loading-date strictly < cutoff (informationally
195
+ safe: at date t we only know loadings as of date t-1)."""
196
+ import bisect
197
+ idx = bisect.bisect_left(sorted_dates, cutoff)
198
+ if idx == 0:
199
+ return None
200
+ return sorted_dates[idx - 1]
201
+
202
+
203
+ def estimate_factor_covariance(
204
+ factor_returns_df: pd.DataFrame,
205
+ *,
206
+ shrinkage: str = "ledoit_wolf",
207
+ min_obs: int = 30,
208
+ ) -> pd.DataFrame:
209
+ """Estimate F = Cov(f_t) over the factor-return panel.
210
+
211
+ Drops rows with any NaN (incomplete regressions). Default LW shrinkage
212
+ mirrors the executor's portfolio_optimizer default; "sample" and "oas"
213
+ also supported. Reuses sklearn estimators.
214
+
215
+ Args:
216
+ factor_returns_df: (T × K_eff) factor-return panel from
217
+ build_factor_returns_series.
218
+ shrinkage: estimator name. "ledoit_wolf" (default), "sample", "oas".
219
+ min_obs: minimum clean rows required. Below floor returns an
220
+ all-NaN F so the caller knows the build was insufficient
221
+ (per no-silent-fails — would-be downstream consumers of F
222
+ see NaN, not silently zero).
223
+
224
+ Returns:
225
+ F: (K_eff × K_eff) DataFrame, index + columns are factor names.
226
+ """
227
+ clean = factor_returns_df.dropna()
228
+ K = factor_returns_df.shape[1]
229
+ cols = list(factor_returns_df.columns)
230
+ if len(clean) < min_obs:
231
+ log.warning(
232
+ "estimate_factor_covariance: only %d clean rows (need ≥%d) — "
233
+ "returning all-NaN F", len(clean), min_obs,
234
+ )
235
+ return pd.DataFrame(np.full((K, K), np.nan), index=cols, columns=cols)
236
+
237
+ if shrinkage == "ledoit_wolf":
238
+ from sklearn.covariance import LedoitWolf
239
+ F = LedoitWolf().fit(clean.to_numpy()).covariance_
240
+ elif shrinkage == "oas":
241
+ from sklearn.covariance import OAS
242
+ F = OAS().fit(clean.to_numpy()).covariance_
243
+ elif shrinkage == "sample":
244
+ F = np.cov(clean.to_numpy(), rowvar=False)
245
+ else:
246
+ raise ValueError(f"Unknown shrinkage: {shrinkage!r}")
247
+ return pd.DataFrame(F, index=cols, columns=cols)
248
+
249
+
250
+ def estimate_idiosyncratic_variance(
251
+ residuals_df: pd.DataFrame,
252
+ *,
253
+ min_obs: int = 30,
254
+ ) -> pd.Series:
255
+ """Per-ticker D_{ii} = Var(ε_{i,t}) — diagonal of the residual cov.
256
+
257
+ Tickers with fewer than ``min_obs`` non-NaN residual rows are
258
+ emitted as NaN per no-silent-fails (downstream Σ = B·F·Bᵀ + D
259
+ construction treats NaN D as "skip this name" or falls back to a
260
+ safe default).
261
+
262
+ Args:
263
+ residuals_df: (T × N) residual panel from
264
+ build_factor_returns_series.
265
+ min_obs: minimum non-NaN observations per ticker.
266
+
267
+ Returns:
268
+ D: (N,) Series indexed by ticker.
269
+ """
270
+ out = pd.Series(np.nan, index=residuals_df.columns, dtype=np.float64)
271
+ for ticker in residuals_df.columns:
272
+ eps = residuals_df[ticker].dropna()
273
+ if len(eps) < min_obs:
274
+ continue
275
+ # Population variance (N divisor — universe is the population for
276
+ # cross-sectional regressions) to match the F estimator convention.
277
+ out[ticker] = float(eps.var(ddof=0))
278
+ return out
279
+
280
+
281
+ def build_factor_risk_model(
282
+ returns_panel: pd.DataFrame,
283
+ loadings_by_date: dict[pd.Timestamp, pd.DataFrame],
284
+ *,
285
+ include_intercept: bool = True,
286
+ cov_shrinkage: str = "ledoit_wolf",
287
+ min_cov_obs: int = 30,
288
+ min_idio_obs: int = 30,
289
+ ) -> dict:
290
+ """End-to-end builder: cross-sectional regressions → F + D.
291
+
292
+ Returns a dict with keys:
293
+ • "factor_returns": (T × K_eff) DataFrame
294
+ • "residuals": (T × N) DataFrame
295
+ • "F": (K_eff × K_eff) DataFrame
296
+ • "D": (N,) Series
297
+ • "metadata": dict with n_dates, n_clean_dates, K_eff, n_tickers
298
+ """
299
+ factor_returns, residuals = build_factor_returns_series(
300
+ returns_panel, loadings_by_date,
301
+ include_intercept=include_intercept,
302
+ )
303
+ F = estimate_factor_covariance(
304
+ factor_returns, shrinkage=cov_shrinkage, min_obs=min_cov_obs,
305
+ )
306
+ D = estimate_idiosyncratic_variance(residuals, min_obs=min_idio_obs)
307
+
308
+ n_clean = int(factor_returns.dropna().shape[0])
309
+ metadata = {
310
+ "n_dates": int(factor_returns.shape[0]),
311
+ "n_clean_dates": n_clean,
312
+ "K_eff": int(factor_returns.shape[1]),
313
+ "n_tickers": int(returns_panel.shape[1]),
314
+ "cov_shrinkage": cov_shrinkage,
315
+ "include_intercept": bool(include_intercept),
316
+ }
317
+ return {
318
+ "factor_returns": factor_returns,
319
+ "residuals": residuals,
320
+ "F": F,
321
+ "D": D,
322
+ "metadata": metadata,
323
+ }
324
+
325
+
326
+ __all__ = [
327
+ "cross_sectional_factor_returns",
328
+ "build_factor_returns_series",
329
+ "estimate_factor_covariance",
330
+ "estimate_idiosyncratic_variance",
331
+ "build_factor_risk_model",
332
+ ]