alpha-engine-lib 0.46.0__tar.gz → 0.47.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/PKG-INFO +7 -2
  2. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/README.md +2 -1
  3. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/pyproject.toml +6 -1
  4. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/__init__.py +1 -1
  5. alpha_engine_lib-0.47.0/src/alpha_engine_lib/quant/factor_risk_xs.py +332 -0
  6. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib.egg-info/PKG-INFO +7 -2
  7. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib.egg-info/SOURCES.txt +2 -0
  8. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib.egg-info/requires.txt +5 -0
  9. alpha_engine_lib-0.47.0/tests/test_quant_factor_risk_xs.py +413 -0
  10. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/setup.cfg +0 -0
  11. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/agent_schemas.py +0 -0
  12. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/alerts.py +0 -0
  13. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/anthropic_payload.py +0 -0
  14. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/arcticdb.py +0 -0
  15. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/artifact_freshness.py +0 -0
  16. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/collector_results.py +0 -0
  17. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/cost.py +0 -0
  18. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/dates.py +0 -0
  19. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/decision_capture.py +0 -0
  20. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/ec2_spot.py +0 -0
  21. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/email_sender.py +0 -0
  22. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/eval_artifacts.py +0 -0
  23. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/locks.py +0 -0
  24. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/logging.py +0 -0
  25. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/model_pricing.yaml +0 -0
  26. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/pillars.py +0 -0
  27. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/pipeline_status/__init__.py +0 -0
  28. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/pipeline_status/read.py +0 -0
  29. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/pipeline_status/registry.py +0 -0
  30. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/pipeline_status/templates.py +0 -0
  31. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/preflight.py +0 -0
  32. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/quant/__init__.py +0 -0
  33. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/quant/attribution.py +0 -0
  34. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/quant/factor_risk.py +0 -0
  35. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/quant/returns.py +0 -0
  36. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/quant/risk_measures.py +0 -0
  37. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/quant/riskstats.py +0 -0
  38. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/rag/__init__.py +0 -0
  39. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/rag/db.py +0 -0
  40. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/rag/embeddings.py +0 -0
  41. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/rag/migrations/0001_content_tsv.sql +0 -0
  42. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/rag/rerank.py +0 -0
  43. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/rag/retrieval.py +0 -0
  44. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/rag/schema.sql +0 -0
  45. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/reconcile.py +0 -0
  46. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/secrets.py +0 -0
  47. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/sources/__init__.py +0 -0
  48. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/sources/protocols.py +0 -0
  49. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/ssm_dispatcher.py +0 -0
  50. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/ssm_log_capture.py +0 -0
  51. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/telegram.py +0 -0
  52. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/trading_calendar.py +0 -0
  53. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/transparency.py +0 -0
  54. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/transparency_inventory.yaml +0 -0
  55. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib/universe.py +0 -0
  56. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib.egg-info/dependency_links.txt +0 -0
  57. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/src/alpha_engine_lib.egg-info/top_level.txt +0 -0
  58. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_agent_schemas.py +0 -0
  59. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_alerts.py +0 -0
  60. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_anthropic_payload.py +0 -0
  61. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_arcticdb.py +0 -0
  62. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_artifact_freshness.py +0 -0
  63. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_collector_results.py +0 -0
  64. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_cost.py +0 -0
  65. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_dates.py +0 -0
  66. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_decision_capture.py +0 -0
  67. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_ec2_spot.py +0 -0
  68. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_email_sender.py +0 -0
  69. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_eval_artifacts.py +0 -0
  70. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_locks.py +0 -0
  71. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_logging.py +0 -0
  72. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_pillars.py +0 -0
  73. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_pipeline_status_read.py +0 -0
  74. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_pipeline_status_registry.py +0 -0
  75. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_pipeline_status_templates.py +0 -0
  76. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_preflight.py +0 -0
  77. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_quant_attribution.py +0 -0
  78. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_quant_factor_risk.py +0 -0
  79. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_quant_returns.py +0 -0
  80. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_quant_risk_measures.py +0 -0
  81. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_quant_riskstats.py +0 -0
  82. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_rag.py +0 -0
  83. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_rag_rerank.py +0 -0
  84. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_rag_retrieval_hybrid.py +0 -0
  85. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_reconcile.py +0 -0
  86. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_secrets.py +0 -0
  87. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_sources_protocols.py +0 -0
  88. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_ssm_dispatcher.py +0 -0
  89. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_ssm_log_capture.py +0 -0
  90. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_telegram.py +0 -0
  91. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_trading_calendar.py +0 -0
  92. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_transparency.py +0 -0
  93. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_universe.py +0 -0
  94. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_version_bump_workflow.py +0 -0
  95. {alpha_engine_lib-0.46.0 → alpha_engine_lib-0.47.0}/tests/test_version_pin.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alpha-engine-lib
3
- Version: 0.46.0
3
+ Version: 0.47.0
4
4
  Summary: Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README.
5
5
  Author: Brian McMahon
6
6
  License: Proprietary
@@ -16,6 +16,10 @@ Requires-Dist: arcticdb>=6.11; extra == "arcticdb"
16
16
  Requires-Dist: pandas>=2.0; extra == "arcticdb"
17
17
  Provides-Extra: quant
18
18
  Requires-Dist: numpy>=1.24; extra == "quant"
19
+ Provides-Extra: quant-xs
20
+ Requires-Dist: numpy>=1.24; extra == "quant-xs"
21
+ Requires-Dist: pandas>=2.0; extra == "quant-xs"
22
+ Requires-Dist: scikit-learn>=1.0; extra == "quant-xs"
19
23
  Provides-Extra: flow-doctor
20
24
  Requires-Dist: flow-doctor[diagnosis,s3]<0.5.0,>=0.4.0; extra == "flow-doctor"
21
25
  Provides-Extra: rag
@@ -254,7 +258,8 @@ Rotates across `(instance_type × subnet)` combinations on `InsufficientInstance
254
258
 
255
259
  The shared institutional-analytics engine: pure, front-end- and data-source-agnostic functions that *describe and measure* a portfolio (performance, risk, attribution) with **no advisory logic** — it sits on the "analytics, not advice" side of the line. Lifted from robodashboard's `analytics/` after the 2026-06-03 cross-repo leverage audit, so both the alpha-engine fleet and robodashboard consume one engine instead of parallel reimplementations. Import the submodule you need (the package keeps no eager imports, so the stdlib-only modules import without numpy):
256
260
 
257
- - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`: `estimate_factor_model` (time-series factor-ETF / Fama-MacBeth loadings), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
261
+ - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`, **Option B** (time-series factor-ETF estimator): `estimate_factor_model` (regress holdings on given factor return series), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
262
+ - **`quant.factor_risk_xs`** — same `Σ = B·F·Bᵀ + D` model, **Option A** (universe-wide cross-sectional Fama-MacBeth estimator): take *exogenous* per-ticker loadings `B` and infer factor returns `f_t` via a cross-sectional OLS at each date → `F`/`D` (`build_factor_risk_model`, `cross_sectional_factor_returns`, `estimate_factor_covariance`, `estimate_idiosyncratic_variance`). **Needs pandas + scikit-learn** — `pip install "alpha-engine-lib[quant-xs]"` (kept separate so numpy-only consumers stay light).
258
263
  - **`quant.risk_measures`** — parametric (Gaussian, Acklam inverse-normal, no scipy) + historical VaR & CVaR, as positive loss fractions at a horizon (stdlib).
259
264
  - **`quant.riskstats`** — `volatility`, `sharpe_ratio`, `sortino_ratio`, `max_drawdown` (stdlib).
260
265
  - **`quant.returns`** — `xirr` (money-weighted, Newton + bisection), `time_weighted_return` (GIPS), `cumulative_return`, `annualize` (stdlib).
@@ -223,7 +223,8 @@ Rotates across `(instance_type × subnet)` combinations on `InsufficientInstance
223
223
 
224
224
  The shared institutional-analytics engine: pure, front-end- and data-source-agnostic functions that *describe and measure* a portfolio (performance, risk, attribution) with **no advisory logic** — it sits on the "analytics, not advice" side of the line. Lifted from robodashboard's `analytics/` after the 2026-06-03 cross-repo leverage audit, so both the alpha-engine fleet and robodashboard consume one engine instead of parallel reimplementations. Import the submodule you need (the package keeps no eager imports, so the stdlib-only modules import without numpy):
225
225
 
226
- - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`: `estimate_factor_model` (time-series factor-ETF / Fama-MacBeth loadings), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
226
+ - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`, **Option B** (time-series factor-ETF estimator): `estimate_factor_model` (regress holdings on given factor return series), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
227
+ - **`quant.factor_risk_xs`** — same `Σ = B·F·Bᵀ + D` model, **Option A** (universe-wide cross-sectional Fama-MacBeth estimator): take *exogenous* per-ticker loadings `B` and infer factor returns `f_t` via a cross-sectional OLS at each date → `F`/`D` (`build_factor_risk_model`, `cross_sectional_factor_returns`, `estimate_factor_covariance`, `estimate_idiosyncratic_variance`). **Needs pandas + scikit-learn** — `pip install "alpha-engine-lib[quant-xs]"` (kept separate so numpy-only consumers stay light).
227
228
  - **`quant.risk_measures`** — parametric (Gaussian, Acklam inverse-normal, no scipy) + historical VaR & CVaR, as positive loss fractions at a horizon (stdlib).
228
229
  - **`quant.riskstats`** — `volatility`, `sharpe_ratio`, `sortino_ratio`, `max_drawdown` (stdlib).
229
230
  - **`quant.returns`** — `xirr` (money-weighted, Newton + bisection), `time_weighted_return` (GIPS), `cumulative_return`, `annualize` (stdlib).
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alpha-engine-lib"
7
- version = "0.46.0"
7
+ version = "0.47.0"
8
8
  description = "Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README."
9
9
  readme = "README.md"
10
10
  # EC2 still runs Python 3.9 on the always-on micro instance (boto3 drops
@@ -34,6 +34,11 @@ arcticdb = ["arcticdb>=6.11", "pandas>=2.0"]
34
34
  # factor-risk module needs numpy; the VaR/CVaR, riskstats, returns, and
35
35
  # attribution modules are pure stdlib and import without this extra.
36
36
  quant = ["numpy>=1.24"]
37
+ # Cross-sectional (Fama-MacBeth) factor risk model — quant.factor_risk_xs.
38
+ # Needs pandas (always) + scikit-learn (LedoitWolf/OAS shrinkage). Kept
39
+ # separate from [quant] so the numpy-only consumers (e.g. robodashboard)
40
+ # don't pull pandas+sklearn.
41
+ quant-xs = ["numpy>=1.24", "pandas>=2.0", "scikit-learn>=1.0"]
37
42
  flow_doctor = ["flow-doctor[diagnosis,s3]>=0.4.0,<0.5.0"]
38
43
  rag = [
39
44
  "psycopg2-binary>=2.9",
@@ -1,3 +1,3 @@
1
1
  """alpha-engine-lib — shared utilities for Alpha Engine modules."""
2
2
 
3
- __version__ = "0.46.0"
3
+ __version__ = "0.47.0"
@@ -0,0 +1,332 @@
1
+ """Cross-sectional (Fama-MacBeth) factor risk model — the "Option A" estimator.
2
+
3
+ Complements ``quant.factor_risk`` (the "Option B" time-series factor-ETF
4
+ estimator). Both produce the inputs to the same Σ = B·F·Bᵀ + D structural
5
+ covariance consumed by ``quant.factor_risk.portfolio_risk`` / ``tracking_error``;
6
+ they differ only in how the factor returns ``f_t`` and the factor covariance
7
+ ``F`` are estimated:
8
+
9
+ - **Option B** (``factor_risk.estimate_factor_model``) — regress each holding's
10
+ return series on a small set of *given* factor return series (market +
11
+ style-ETF spreads). Loadings ``B`` are the regression betas. numpy-only.
12
+ - **Option A** (here) — take *exogenous* per-ticker factor loadings ``B`` (e.g.
13
+ fundamentals-derived style exposures) and infer the factor returns ``f_t`` by
14
+ a cross-sectional OLS at each date (Fama-MacBeth 1973):
15
+
16
+ r_t = B_{t-1} · f_t + ε_t
17
+
18
+ Stacking ``f_t`` over a rolling window gives a (T × K) factor-return panel;
19
+ ``F`` is its (Ledoit-Wolf-shrunk) covariance and ``D`` the per-ticker
20
+ time-series variance of the residuals ε. This is the universe-wide Barra-lite
21
+ build.
22
+
23
+ **Dependencies:** pandas (always) + scikit-learn (lazy, only for the
24
+ ``ledoit_wolf``/``oas`` shrinkage estimators). Install ``alpha-engine-lib[quant-xs]``.
25
+ Kept in its own module so the numpy-only ``factor_risk``/``risk_measures``/etc.
26
+ consumers don't pull pandas+sklearn.
27
+
28
+ References:
29
+ - Fama & MacBeth 1973 "Risk, Return, and Equilibrium: Empirical Tests"
30
+ (JPE 81(3)) — cross-sectional-regression construction of factor returns
31
+ - Grinold & Kahn 2000, _Active Portfolio Management_, Ch. 3 — canonical
32
+ structural factor risk model
33
+ - Menchero, Orr & Wang 2011 "The Barra US Equity Model (USE4)
34
+ Methodology Notes" — operational reference
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import logging
40
+ from typing import Iterable
41
+
42
+ import numpy as np
43
+ import pandas as pd
44
+
45
+ log = logging.getLogger(__name__)
46
+
47
+
48
+ _MIN_OBS_OVER_K = 5 # require ≥ K + 5 valid observations for a stable regression
49
+
50
+
51
+ def cross_sectional_factor_returns(
52
+ returns_t: np.ndarray,
53
+ loadings_prev: np.ndarray,
54
+ *,
55
+ include_intercept: bool = True,
56
+ ) -> tuple[np.ndarray, np.ndarray]:
57
+ """Solve r_t = B_{t-1} · f_t + ε_t for one date via OLS.
58
+
59
+ Args:
60
+ returns_t: (N,) realized returns at time t.
61
+ loadings_prev: (N, K) factor loadings at time t-1.
62
+ include_intercept: if True, prepends a column of 1s to the
63
+ loadings (the "market" factor return). f_t[0] becomes the
64
+ cross-sectional mean return; f_t[1:] are the per-factor
65
+ slopes. Default True.
66
+
67
+ Returns:
68
+ (f_t, residuals):
69
+ • f_t: (K_eff,) factor return vector — length K+1 with
70
+ intercept, K without.
71
+ • residuals: (N,) per-ticker ε_t. NaN for rows where the
72
+ inputs had NaN (preserved positionally so the caller can
73
+ keep aligning with the universe).
74
+
75
+ Rows with NaN in either r_t or any column of B_{t-1} are excluded
76
+ from the regression. If fewer than K_eff + 5 valid rows remain
77
+ (the regression is unstable), returns all-NaN for both outputs.
78
+ """
79
+ returns_t = np.asarray(returns_t, dtype=np.float64).ravel()
80
+ loadings_prev = np.asarray(loadings_prev, dtype=np.float64)
81
+ if loadings_prev.ndim != 2:
82
+ raise ValueError(
83
+ f"loadings_prev must be 2-D (N × K); got shape {loadings_prev.shape}"
84
+ )
85
+ N, K = loadings_prev.shape
86
+ if returns_t.shape != (N,):
87
+ raise ValueError(
88
+ f"returns_t shape {returns_t.shape} != ({N},) matching loadings rows"
89
+ )
90
+
91
+ if include_intercept:
92
+ B = np.column_stack([np.ones(N), loadings_prev])
93
+ K_eff = K + 1
94
+ else:
95
+ B = loadings_prev
96
+ K_eff = K
97
+
98
+ valid = np.isfinite(returns_t) & np.all(np.isfinite(B), axis=1)
99
+ n_valid = int(valid.sum())
100
+ if n_valid < K_eff + _MIN_OBS_OVER_K:
101
+ return np.full(K_eff, np.nan), np.full(N, np.nan)
102
+
103
+ r_valid = returns_t[valid]
104
+ B_valid = B[valid]
105
+
106
+ # OLS via lstsq is rank-robust (returns minimum-norm solution if B
107
+ # is rank-deficient). Rank-deficient B is a soft warning, not an
108
+ # error — caller decides whether to drop low-rank dates.
109
+ f_t, *_ = np.linalg.lstsq(B_valid, r_valid, rcond=None)
110
+
111
+ residuals = np.full(N, np.nan)
112
+ residuals[valid] = r_valid - B_valid @ f_t
113
+ return f_t, residuals
114
+
115
+
116
+ def build_factor_returns_series(
117
+ returns_panel: pd.DataFrame,
118
+ loadings_by_date: dict[pd.Timestamp, pd.DataFrame],
119
+ *,
120
+ include_intercept: bool = True,
121
+ factor_names: Iterable[str] | None = None,
122
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
123
+ """Loop over dates in ``returns_panel``; for each date t, run the
124
+ cross-sectional regression r_t = B_{t-1} · f_t + ε_t.
125
+
126
+ Args:
127
+ returns_panel: (T × N) DataFrame indexed by date, columns are
128
+ ticker names. r_t is the t-th row.
129
+ loadings_by_date: mapping date_t-1 → (N × K) DataFrame of
130
+ factor loadings for that date. Indexed by ticker, columns
131
+ are factor names. The driver looks up loadings at the
132
+ previous available date for each t (most recent ≤ t-1).
133
+ include_intercept: prepends a market-factor column. See
134
+ cross_sectional_factor_returns. Default True.
135
+ factor_names: optional explicit order for the K factor columns.
136
+ If provided, loadings_by_date entries are reindexed to this
137
+ order. Default: use the order of the first loadings frame.
138
+
139
+ Returns:
140
+ (factor_returns_df, residuals_df):
141
+ • factor_returns_df: (T × K_eff) — index matches returns_panel
142
+ dates; columns are ["market", *factor_names] when intercept
143
+ is on, [*factor_names] when off.
144
+ • residuals_df: (T × N) — same shape as returns_panel; NaN
145
+ where the regression was skipped or input was missing.
146
+ """
147
+ if returns_panel.empty:
148
+ return pd.DataFrame(), pd.DataFrame()
149
+
150
+ dates = list(returns_panel.index)
151
+ tickers = list(returns_panel.columns)
152
+ N = len(tickers)
153
+
154
+ # Resolve canonical factor name list from the first usable loadings frame
155
+ if factor_names is None:
156
+ sample = next(iter(loadings_by_date.values()), None)
157
+ if sample is None:
158
+ raise ValueError("loadings_by_date is empty — nothing to regress against")
159
+ factor_names = list(sample.columns)
160
+ factor_names = list(factor_names)
161
+ K = len(factor_names)
162
+
163
+ col_names = (["market"] + factor_names) if include_intercept else factor_names
164
+
165
+ f_panel = np.full((len(dates), len(col_names)), np.nan)
166
+ eps_panel = np.full((len(dates), N), np.nan)
167
+
168
+ sorted_loading_dates = sorted(loadings_by_date.keys())
169
+
170
+ for i, date_t in enumerate(dates):
171
+ prev_date = _latest_loading_date_at_or_before(sorted_loading_dates, date_t)
172
+ if prev_date is None:
173
+ continue
174
+ B_df = loadings_by_date[prev_date].reindex(index=tickers, columns=factor_names)
175
+ if B_df.empty:
176
+ continue
177
+ B = B_df.to_numpy(dtype=np.float64)
178
+ r = returns_panel.iloc[i].to_numpy(dtype=np.float64)
179
+
180
+ f_t, residuals = cross_sectional_factor_returns(
181
+ r, B, include_intercept=include_intercept,
182
+ )
183
+ f_panel[i] = f_t
184
+ eps_panel[i] = residuals
185
+
186
+ factor_returns_df = pd.DataFrame(f_panel, index=dates, columns=col_names)
187
+ residuals_df = pd.DataFrame(eps_panel, index=dates, columns=tickers)
188
+ return factor_returns_df, residuals_df
189
+
190
+
191
+ def _latest_loading_date_at_or_before(
192
+ sorted_dates: list[pd.Timestamp], cutoff: pd.Timestamp,
193
+ ) -> pd.Timestamp | None:
194
+ """Bisect for the latest loading-date strictly < cutoff (informationally
195
+ safe: at date t we only know loadings as of date t-1)."""
196
+ import bisect
197
+ idx = bisect.bisect_left(sorted_dates, cutoff)
198
+ if idx == 0:
199
+ return None
200
+ return sorted_dates[idx - 1]
201
+
202
+
203
+ def estimate_factor_covariance(
204
+ factor_returns_df: pd.DataFrame,
205
+ *,
206
+ shrinkage: str = "ledoit_wolf",
207
+ min_obs: int = 30,
208
+ ) -> pd.DataFrame:
209
+ """Estimate F = Cov(f_t) over the factor-return panel.
210
+
211
+ Drops rows with any NaN (incomplete regressions). Default LW shrinkage
212
+ mirrors the executor's portfolio_optimizer default; "sample" and "oas"
213
+ also supported. Reuses sklearn estimators.
214
+
215
+ Args:
216
+ factor_returns_df: (T × K_eff) factor-return panel from
217
+ build_factor_returns_series.
218
+ shrinkage: estimator name. "ledoit_wolf" (default), "sample", "oas".
219
+ min_obs: minimum clean rows required. Below floor returns an
220
+ all-NaN F so the caller knows the build was insufficient
221
+ (per no-silent-fails — would-be downstream consumers of F
222
+ see NaN, not silently zero).
223
+
224
+ Returns:
225
+ F: (K_eff × K_eff) DataFrame, index + columns are factor names.
226
+ """
227
+ clean = factor_returns_df.dropna()
228
+ K = factor_returns_df.shape[1]
229
+ cols = list(factor_returns_df.columns)
230
+ if len(clean) < min_obs:
231
+ log.warning(
232
+ "estimate_factor_covariance: only %d clean rows (need ≥%d) — "
233
+ "returning all-NaN F", len(clean), min_obs,
234
+ )
235
+ return pd.DataFrame(np.full((K, K), np.nan), index=cols, columns=cols)
236
+
237
+ if shrinkage == "ledoit_wolf":
238
+ from sklearn.covariance import LedoitWolf
239
+ F = LedoitWolf().fit(clean.to_numpy()).covariance_
240
+ elif shrinkage == "oas":
241
+ from sklearn.covariance import OAS
242
+ F = OAS().fit(clean.to_numpy()).covariance_
243
+ elif shrinkage == "sample":
244
+ F = np.cov(clean.to_numpy(), rowvar=False)
245
+ else:
246
+ raise ValueError(f"Unknown shrinkage: {shrinkage!r}")
247
+ return pd.DataFrame(F, index=cols, columns=cols)
248
+
249
+
250
+ def estimate_idiosyncratic_variance(
251
+ residuals_df: pd.DataFrame,
252
+ *,
253
+ min_obs: int = 30,
254
+ ) -> pd.Series:
255
+ """Per-ticker D_{ii} = Var(ε_{i,t}) — diagonal of the residual cov.
256
+
257
+ Tickers with fewer than ``min_obs`` non-NaN residual rows are
258
+ emitted as NaN per no-silent-fails (downstream Σ = B·F·Bᵀ + D
259
+ construction treats NaN D as "skip this name" or falls back to a
260
+ safe default).
261
+
262
+ Args:
263
+ residuals_df: (T × N) residual panel from
264
+ build_factor_returns_series.
265
+ min_obs: minimum non-NaN observations per ticker.
266
+
267
+ Returns:
268
+ D: (N,) Series indexed by ticker.
269
+ """
270
+ out = pd.Series(np.nan, index=residuals_df.columns, dtype=np.float64)
271
+ for ticker in residuals_df.columns:
272
+ eps = residuals_df[ticker].dropna()
273
+ if len(eps) < min_obs:
274
+ continue
275
+ # Population variance (N divisor — universe is the population for
276
+ # cross-sectional regressions) to match the F estimator convention.
277
+ out[ticker] = float(eps.var(ddof=0))
278
+ return out
279
+
280
+
281
+ def build_factor_risk_model(
282
+ returns_panel: pd.DataFrame,
283
+ loadings_by_date: dict[pd.Timestamp, pd.DataFrame],
284
+ *,
285
+ include_intercept: bool = True,
286
+ cov_shrinkage: str = "ledoit_wolf",
287
+ min_cov_obs: int = 30,
288
+ min_idio_obs: int = 30,
289
+ ) -> dict:
290
+ """End-to-end builder: cross-sectional regressions → F + D.
291
+
292
+ Returns a dict with keys:
293
+ • "factor_returns": (T × K_eff) DataFrame
294
+ • "residuals": (T × N) DataFrame
295
+ • "F": (K_eff × K_eff) DataFrame
296
+ • "D": (N,) Series
297
+ • "metadata": dict with n_dates, n_clean_dates, K_eff, n_tickers
298
+ """
299
+ factor_returns, residuals = build_factor_returns_series(
300
+ returns_panel, loadings_by_date,
301
+ include_intercept=include_intercept,
302
+ )
303
+ F = estimate_factor_covariance(
304
+ factor_returns, shrinkage=cov_shrinkage, min_obs=min_cov_obs,
305
+ )
306
+ D = estimate_idiosyncratic_variance(residuals, min_obs=min_idio_obs)
307
+
308
+ n_clean = int(factor_returns.dropna().shape[0])
309
+ metadata = {
310
+ "n_dates": int(factor_returns.shape[0]),
311
+ "n_clean_dates": n_clean,
312
+ "K_eff": int(factor_returns.shape[1]),
313
+ "n_tickers": int(returns_panel.shape[1]),
314
+ "cov_shrinkage": cov_shrinkage,
315
+ "include_intercept": bool(include_intercept),
316
+ }
317
+ return {
318
+ "factor_returns": factor_returns,
319
+ "residuals": residuals,
320
+ "F": F,
321
+ "D": D,
322
+ "metadata": metadata,
323
+ }
324
+
325
+
326
+ __all__ = [
327
+ "cross_sectional_factor_returns",
328
+ "build_factor_returns_series",
329
+ "estimate_factor_covariance",
330
+ "estimate_idiosyncratic_variance",
331
+ "build_factor_risk_model",
332
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alpha-engine-lib
3
- Version: 0.46.0
3
+ Version: 0.47.0
4
4
  Summary: Shared utilities for the Alpha Engine modules: preflight, logging, ArcticDB, dates, decision capture, cost telemetry, Anthropic payload chokepoint, artifact freshness, RAG, agent schemas, SSM secrets, Telegram + SNS alerts, EC2 spot resilience, SSM log-capture, SSM dispatcher, Step-Functions execution-state projection, and S3-conditional-PUT writer locks. Full surface documented in README.
5
5
  Author: Brian McMahon
6
6
  License: Proprietary
@@ -16,6 +16,10 @@ Requires-Dist: arcticdb>=6.11; extra == "arcticdb"
16
16
  Requires-Dist: pandas>=2.0; extra == "arcticdb"
17
17
  Provides-Extra: quant
18
18
  Requires-Dist: numpy>=1.24; extra == "quant"
19
+ Provides-Extra: quant-xs
20
+ Requires-Dist: numpy>=1.24; extra == "quant-xs"
21
+ Requires-Dist: pandas>=2.0; extra == "quant-xs"
22
+ Requires-Dist: scikit-learn>=1.0; extra == "quant-xs"
19
23
  Provides-Extra: flow-doctor
20
24
  Requires-Dist: flow-doctor[diagnosis,s3]<0.5.0,>=0.4.0; extra == "flow-doctor"
21
25
  Provides-Extra: rag
@@ -254,7 +258,8 @@ Rotates across `(instance_type × subnet)` combinations on `InsufficientInstance
254
258
 
255
259
  The shared institutional-analytics engine: pure, front-end- and data-source-agnostic functions that *describe and measure* a portfolio (performance, risk, attribution) with **no advisory logic** — it sits on the "analytics, not advice" side of the line. Lifted from robodashboard's `analytics/` after the 2026-06-03 cross-repo leverage audit, so both the alpha-engine fleet and robodashboard consume one engine instead of parallel reimplementations. Import the submodule you need (the package keeps no eager imports, so the stdlib-only modules import without numpy):
256
260
 
257
- - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`: `estimate_factor_model` (time-series factor-ETF / Fama-MacBeth loadings), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
261
+ - **`quant.factor_risk`** — statistical factor risk model `Σ = B·F·Bᵀ + D`, **Option B** (time-series factor-ETF estimator): `estimate_factor_model` (regress holdings on given factor return series), `portfolio_risk` (ex-ante vol + factor/idio split + per-factor variance contribution), `tracking_error`, `benchmark_exposure`, and a numpy-only `ledoit_wolf_cov` (no sklearn). The estimator-agnostic consumption core (`portfolio_risk`/`tracking_error`) consumes any `FactorRiskModel` (B, F, D). **Needs numpy** — `pip install "alpha-engine-lib[quant]"`.
262
+ - **`quant.factor_risk_xs`** — same `Σ = B·F·Bᵀ + D` model, **Option A** (universe-wide cross-sectional Fama-MacBeth estimator): take *exogenous* per-ticker loadings `B` and infer factor returns `f_t` via a cross-sectional OLS at each date → `F`/`D` (`build_factor_risk_model`, `cross_sectional_factor_returns`, `estimate_factor_covariance`, `estimate_idiosyncratic_variance`). **Needs pandas + scikit-learn** — `pip install "alpha-engine-lib[quant-xs]"` (kept separate so numpy-only consumers stay light).
258
263
  - **`quant.risk_measures`** — parametric (Gaussian, Acklam inverse-normal, no scipy) + historical VaR & CVaR, as positive loss fractions at a horizon (stdlib).
259
264
  - **`quant.riskstats`** — `volatility`, `sharpe_ratio`, `sortino_ratio`, `max_drawdown` (stdlib).
260
265
  - **`quant.returns`** — `xirr` (money-weighted, Newton + bisection), `time_weighted_return` (GIPS), `cumulative_return`, `annualize` (stdlib).
@@ -39,6 +39,7 @@ src/alpha_engine_lib/pipeline_status/templates.py
39
39
  src/alpha_engine_lib/quant/__init__.py
40
40
  src/alpha_engine_lib/quant/attribution.py
41
41
  src/alpha_engine_lib/quant/factor_risk.py
42
+ src/alpha_engine_lib/quant/factor_risk_xs.py
42
43
  src/alpha_engine_lib/quant/returns.py
43
44
  src/alpha_engine_lib/quant/risk_measures.py
44
45
  src/alpha_engine_lib/quant/riskstats.py
@@ -72,6 +73,7 @@ tests/test_pipeline_status_templates.py
72
73
  tests/test_preflight.py
73
74
  tests/test_quant_attribution.py
74
75
  tests/test_quant_factor_risk.py
76
+ tests/test_quant_factor_risk_xs.py
75
77
  tests/test_quant_returns.py
76
78
  tests/test_quant_risk_measures.py
77
79
  tests/test_quant_riskstats.py
@@ -21,6 +21,11 @@ flow-doctor[diagnosis,s3]<0.5.0,>=0.4.0
21
21
  [quant]
22
22
  numpy>=1.24
23
23
 
24
+ [quant-xs]
25
+ numpy>=1.24
26
+ pandas>=2.0
27
+ scikit-learn>=1.0
28
+
24
29
  [rag]
25
30
  psycopg2-binary>=2.9
26
31
  pgvector>=0.2
@@ -0,0 +1,413 @@
1
+ """Cross-sectional (Fama-MacBeth) factor-risk model — Barra-style F + D.
2
+
3
+ Tests the cross-sectional-regression primitives that turn an exogenous
4
+ factor-loading matrix B into F (factor-return covariance) and D (per-ticker
5
+ idiosyncratic variance) — the inputs to a Σ = B·F·Bᵀ + D risk decomposition.
6
+
7
+ Load-bearing property: when synthetic data is generated from a known
8
+ true F, the estimator should recover it within sampling error. The
9
+ recovery test is the institutional gate — without it, a silent
10
+ miscalibration would propagate into a downstream risk estimate.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import pytest
16
+
17
+ # factor_risk_xs is the [quant-xs] extra (pandas always; sklearn for the default
18
+ # LedoitWolf/OAS shrinkage). Skip the module cleanly when they're absent.
19
+ np = pytest.importorskip("numpy")
20
+ pd = pytest.importorskip("pandas")
21
+ pytest.importorskip("sklearn")
22
+
23
+ from alpha_engine_lib.quant.factor_risk_xs import ( # noqa: E402 (after importorskip guard)
24
+ build_factor_returns_series,
25
+ build_factor_risk_model,
26
+ cross_sectional_factor_returns,
27
+ estimate_factor_covariance,
28
+ estimate_idiosyncratic_variance,
29
+ )
30
+
31
+
32
+ # ─── Helpers ────────────────────────────────────────────────────────────────
33
+
34
+
35
+ def _synthetic_panel(
36
+ N: int = 30, K: int = 4, T: int = 250, seed: int = 0,
37
+ true_F_diag: float = 0.0004, true_D_scale: float = 0.0009,
38
+ market_factor_var: float = 0.0001,
39
+ ):
40
+ """Generate a synthetic factor-model panel with known true F + D.
41
+
42
+ True model: r_t = market_t + B · f_t + ε_t, where market_t ~ N(0, market_factor_var),
43
+ f_t ~ N(0, diag(true_F_diag)), ε_t ~ N(0, D), D_i ~ uniform.
44
+ """
45
+ rng = np.random.default_rng(seed)
46
+ dates = pd.date_range("2024-01-01", periods=T, freq="B")
47
+ tickers = [f"T{i:02d}" for i in range(N)]
48
+ factor_names = [f"f{k}" for k in range(K)]
49
+
50
+ # Stationary z-scored loadings (mean ≈ 0, std ≈ 1 per factor)
51
+ B_raw = rng.normal(0, 1, size=(N, K))
52
+ B_raw = (B_raw - B_raw.mean(axis=0)) / B_raw.std(axis=0)
53
+
54
+ true_F = np.eye(K) * true_F_diag
55
+ true_D = rng.uniform(0.5 * true_D_scale, 1.5 * true_D_scale, N)
56
+
57
+ loadings_by_date = {d: pd.DataFrame(B_raw, index=tickers, columns=factor_names)
58
+ for d in dates}
59
+ returns_panel = np.zeros((T, N))
60
+ for i in range(T):
61
+ m_t = float(rng.normal(0, np.sqrt(market_factor_var)))
62
+ f_t = rng.multivariate_normal(np.zeros(K), true_F)
63
+ eps_t = rng.normal(0, np.sqrt(true_D), N)
64
+ returns_panel[i] = m_t + B_raw @ f_t + eps_t
65
+
66
+ returns_df = pd.DataFrame(returns_panel, index=dates, columns=tickers)
67
+ return {
68
+ "returns_df": returns_df,
69
+ "loadings_by_date": loadings_by_date,
70
+ "B_true": B_raw,
71
+ "true_F_diag": true_F_diag,
72
+ "true_D": true_D,
73
+ "factor_names": factor_names,
74
+ "tickers": tickers,
75
+ }
76
+
77
+
78
+ # ─── cross_sectional_factor_returns ─────────────────────────────────────────
79
+
80
+
81
+ class TestCrossSectionalFactorReturns:
82
+ def test_recovers_known_factor_returns_no_intercept(self):
83
+ """Exact construction: r = B·f_true → OLS must recover f_true exactly
84
+ (zero residuals when no noise + no intercept needed)."""
85
+ rng = np.random.default_rng(1)
86
+ N, K = 50, 5
87
+ B = rng.normal(0, 1, size=(N, K))
88
+ f_true = np.array([0.01, -0.02, 0.005, 0.015, -0.008])
89
+ r = B @ f_true
90
+
91
+ f_hat, residuals = cross_sectional_factor_returns(
92
+ r, B, include_intercept=False,
93
+ )
94
+ np.testing.assert_allclose(f_hat, f_true, atol=1e-10)
95
+ # Residuals are zero up to numerical noise
96
+ assert np.max(np.abs(residuals)) < 1e-9
97
+
98
+ def test_with_intercept_recovers_market_plus_factors(self):
99
+ """r = m + B·f → 6-element solution with intercept first."""
100
+ rng = np.random.default_rng(2)
101
+ N, K = 50, 4
102
+ B = rng.normal(0, 1, size=(N, K))
103
+ B = B - B.mean(axis=0) # z-scored loadings have mean 0
104
+ market = 0.005
105
+ f_true = np.array([0.01, -0.02, 0.005, 0.015])
106
+ r = market + B @ f_true
107
+
108
+ f_hat, _ = cross_sectional_factor_returns(
109
+ r, B, include_intercept=True,
110
+ )
111
+ assert f_hat.shape == (K + 1,)
112
+ assert f_hat[0] == pytest.approx(market, abs=1e-10)
113
+ np.testing.assert_allclose(f_hat[1:], f_true, atol=1e-10)
114
+
115
+ def test_handles_noise_with_finite_error(self):
116
+ """Adding noise → OLS finds the right *direction* but residuals
117
+ absorb the noise. Sanity: f_hat is close to f_true; residual std
118
+ is close to the noise std."""
119
+ rng = np.random.default_rng(3)
120
+ N, K = 100, 4
121
+ B = rng.normal(0, 1, size=(N, K))
122
+ f_true = np.array([0.01, -0.02, 0.005, 0.015])
123
+ noise = rng.normal(0, 0.02, N)
124
+ r = B @ f_true + noise
125
+
126
+ f_hat, residuals = cross_sectional_factor_returns(
127
+ r, B, include_intercept=False,
128
+ )
129
+ # Each estimated coefficient within ~3 standard errors of the truth
130
+ np.testing.assert_allclose(f_hat, f_true, atol=0.008)
131
+ # Residual std should be close to the input noise std
132
+ assert abs(float(np.std(residuals)) - 0.02) < 0.005
133
+
134
+ def test_nan_rows_dropped(self):
135
+ rng = np.random.default_rng(4)
136
+ N, K = 50, 3
137
+ B = rng.normal(0, 1, size=(N, K))
138
+ f_true = np.array([0.01, -0.01, 0.005])
139
+ r = B @ f_true
140
+ # Inject NaN
141
+ r_with_nan = r.copy()
142
+ r_with_nan[0:5] = np.nan
143
+ B_with_nan = B.copy()
144
+ B_with_nan[10:12, 0] = np.nan
145
+
146
+ f_hat, residuals = cross_sectional_factor_returns(
147
+ r_with_nan, B_with_nan, include_intercept=False,
148
+ )
149
+ np.testing.assert_allclose(f_hat, f_true, atol=1e-9)
150
+ # Residuals for NaN-input rows must be NaN
151
+ assert np.all(np.isnan(residuals[0:5]))
152
+ assert np.all(np.isnan(residuals[10:12]))
153
+
154
+ def test_too_few_observations_returns_nan(self):
155
+ """K + 5 observation buffer prevents unstable solves."""
156
+ rng = np.random.default_rng(5)
157
+ N, K = 6, 4 # only 6 rows for 4 factors + intercept = 5 → not ≥ 10
158
+ B = rng.normal(0, 1, size=(N, K))
159
+ r = rng.normal(0, 0.01, N)
160
+
161
+ f_hat, residuals = cross_sectional_factor_returns(
162
+ r, B, include_intercept=True,
163
+ )
164
+ assert np.all(np.isnan(f_hat))
165
+ assert np.all(np.isnan(residuals))
166
+
167
+ def test_wrong_shape_raises(self):
168
+ rng = np.random.default_rng(6)
169
+ with pytest.raises(ValueError, match="loadings_prev must be 2-D"):
170
+ cross_sectional_factor_returns(np.zeros(10), np.zeros(10))
171
+ with pytest.raises(ValueError, match="returns_t shape"):
172
+ cross_sectional_factor_returns(np.zeros(11), rng.normal(0, 1, (10, 3)))
173
+
174
+ def test_rank_deficient_loadings_returns_minimum_norm_solution(self):
175
+ """A perfectly collinear factor column shouldn't crash — lstsq
176
+ returns the minimum-norm solution. Verifies the no-crash contract."""
177
+ N, K = 30, 3
178
+ rng = np.random.default_rng(7)
179
+ B = rng.normal(0, 1, size=(N, K))
180
+ B[:, 2] = B[:, 0] # Column 2 == Column 0 → rank 2, not 3
181
+ r = rng.normal(0, 0.01, N)
182
+ # Should not raise
183
+ f_hat, residuals = cross_sectional_factor_returns(
184
+ r, B, include_intercept=False,
185
+ )
186
+ # All-finite — solver succeeded
187
+ assert np.all(np.isfinite(f_hat))
188
+
189
+
190
+ # ─── build_factor_returns_series ────────────────────────────────────────────
191
+
192
+
193
+ class TestBuildFactorReturnsSeries:
194
+ def test_emits_factor_returns_and_residuals_panels(self):
195
+ data = _synthetic_panel(N=30, K=4, T=100)
196
+ f_df, eps_df = build_factor_returns_series(
197
+ data["returns_df"], data["loadings_by_date"],
198
+ )
199
+ # T rows; K + 1 columns (intercept on by default)
200
+ assert f_df.shape == (100, 5)
201
+ assert eps_df.shape == (100, 30)
202
+ # First date has no prior loadings → all NaN
203
+ assert f_df.iloc[0].isna().all()
204
+ # Subsequent dates have factor returns
205
+ assert not f_df.iloc[10].isna().any()
206
+
207
+ def test_first_date_has_no_prior_loadings(self):
208
+ """Informational safety: at date t we may only use loadings at
209
+ strictly earlier dates (t-1 or older)."""
210
+ data = _synthetic_panel(T=10)
211
+ f_df, _ = build_factor_returns_series(
212
+ data["returns_df"], data["loadings_by_date"],
213
+ )
214
+ assert f_df.iloc[0].isna().all()
215
+
216
+ def test_factor_names_argument_pins_order(self):
217
+ data = _synthetic_panel(K=4)
218
+ custom_order = ["f3", "f1", "f0", "f2"]
219
+ f_df, _ = build_factor_returns_series(
220
+ data["returns_df"], data["loadings_by_date"],
221
+ factor_names=custom_order,
222
+ )
223
+ # market column first (intercept on), then custom order
224
+ assert list(f_df.columns) == ["market"] + custom_order
225
+
226
+ def test_include_intercept_false_skips_market_column(self):
227
+ data = _synthetic_panel(K=4)
228
+ f_df, _ = build_factor_returns_series(
229
+ data["returns_df"], data["loadings_by_date"],
230
+ include_intercept=False,
231
+ )
232
+ # Only K columns
233
+ assert f_df.shape[1] == 4
234
+ assert "market" not in f_df.columns
235
+
236
+ def test_empty_returns_panel_returns_empty(self):
237
+ f_df, eps_df = build_factor_returns_series(pd.DataFrame(), {})
238
+ assert f_df.empty
239
+ assert eps_df.empty
240
+
241
+ def test_empty_loadings_raises(self):
242
+ returns_df = pd.DataFrame(np.zeros((5, 3)), columns=["A", "B", "C"])
243
+ with pytest.raises(ValueError, match="loadings_by_date is empty"):
244
+ build_factor_returns_series(returns_df, {})
245
+
246
+
247
+ # ─── estimate_factor_covariance ─────────────────────────────────────────────
248
+
249
+
250
+ class TestEstimateFactorCovariance:
251
+ def test_recovers_known_diagonal_F(self):
252
+ """The load-bearing recovery test: when the synthetic data is
253
+ generated with diagonal F = 0.0004 · I, the estimator (with
254
+ plenty of samples) should produce a roughly-diagonal F with
255
+ diagonal values in the ballpark of 0.0004."""
256
+ data = _synthetic_panel(N=50, K=4, T=500, seed=11, true_F_diag=0.0004)
257
+ f_df, _ = build_factor_returns_series(
258
+ data["returns_df"], data["loadings_by_date"],
259
+ include_intercept=False,
260
+ )
261
+ F = estimate_factor_covariance(f_df, shrinkage="sample")
262
+ # Drop the first row (NaN — no prior loadings)
263
+ diag = np.diag(F.values)
264
+ # LW would compress diag toward mean; use sample for the recovery test.
265
+ # Allow 50% relative tolerance — finite-sample noise + LW shrinkage.
266
+ for d in diag:
267
+ assert 0.0001 < d < 0.001, (
268
+ f"Diagonal entry {d:.6f} outside plausible range [1e-4, 1e-3] "
269
+ f"around true 0.0004"
270
+ )
271
+
272
+ def test_ledoit_wolf_returns_psd_matrix(self):
273
+ data = _synthetic_panel(N=30, K=4, T=200, seed=12)
274
+ f_df, _ = build_factor_returns_series(
275
+ data["returns_df"], data["loadings_by_date"],
276
+ )
277
+ F = estimate_factor_covariance(f_df, shrinkage="ledoit_wolf")
278
+ eigvals = np.linalg.eigvalsh(F.values)
279
+ assert eigvals.min() >= -1e-10, (
280
+ f"LW F must be PSD; got min eigval={eigvals.min()}"
281
+ )
282
+
283
+ def test_oas_estimator_works(self):
284
+ data = _synthetic_panel(N=30, K=4, T=200, seed=13)
285
+ f_df, _ = build_factor_returns_series(
286
+ data["returns_df"], data["loadings_by_date"],
287
+ )
288
+ F = estimate_factor_covariance(f_df, shrinkage="oas")
289
+ assert F.shape == (5, 5) # K + intercept
290
+ eigvals = np.linalg.eigvalsh(F.values)
291
+ assert eigvals.min() >= -1e-10
292
+
293
+ def test_insufficient_data_returns_nan_F(self):
294
+ """Below min_obs → all-NaN F so caller knows the build is bad."""
295
+ f_df = pd.DataFrame(np.random.normal(0, 0.01, (10, 4)),
296
+ columns=["a", "b", "c", "d"])
297
+ F = estimate_factor_covariance(f_df, min_obs=30)
298
+ assert F.shape == (4, 4)
299
+ assert F.isna().all().all()
300
+
301
+ def test_unknown_shrinkage_raises(self):
302
+ data = _synthetic_panel(T=100)
303
+ f_df, _ = build_factor_returns_series(
304
+ data["returns_df"], data["loadings_by_date"],
305
+ )
306
+ with pytest.raises(ValueError, match="Unknown shrinkage"):
307
+ estimate_factor_covariance(f_df, shrinkage="not-a-real-estimator")
308
+
309
+
310
+ # ─── estimate_idiosyncratic_variance ────────────────────────────────────────
311
+
312
+
313
+ class TestEstimateIdiosyncraticVariance:
314
+ def test_recovers_per_ticker_idio_variance(self):
315
+ """Recovery: synthetic D is uniform between 0.5*scale and 1.5*scale;
316
+ the estimator's mean across tickers should match the true mean
317
+ within sampling error."""
318
+ data = _synthetic_panel(N=40, K=4, T=400, seed=21, true_D_scale=0.0009)
319
+ _, eps_df = build_factor_returns_series(
320
+ data["returns_df"], data["loadings_by_date"],
321
+ )
322
+ D = estimate_idiosyncratic_variance(eps_df)
323
+ # Mean of recovered D close to mean of true D
324
+ true_mean = float(data["true_D"].mean())
325
+ rec_mean = float(D.dropna().mean())
326
+ # Within 30% relative — finite-sample + finite-K-factor noise
327
+ assert abs(rec_mean - true_mean) / true_mean < 0.3, (
328
+ f"Mean idio variance: recovered {rec_mean:.6f} vs true {true_mean:.6f}"
329
+ )
330
+
331
+ def test_all_positive_or_nan(self):
332
+ data = _synthetic_panel(N=30, K=4, T=200, seed=22)
333
+ _, eps_df = build_factor_returns_series(
334
+ data["returns_df"], data["loadings_by_date"],
335
+ )
336
+ D = estimate_idiosyncratic_variance(eps_df)
337
+ finite = D.dropna()
338
+ assert len(finite) > 0
339
+ assert (finite > 0).all()
340
+
341
+ def test_min_obs_skips_thin_tickers(self):
342
+ """A ticker with <min_obs non-NaN residuals → NaN in D, not 0."""
343
+ rng = np.random.default_rng(23)
344
+ T, N = 200, 4
345
+ eps = rng.normal(0, 0.01, size=(T, N))
346
+ eps[:190, 0] = np.nan # ticker 0 has only 10 non-NaN obs
347
+ eps_df = pd.DataFrame(eps, columns=[f"T{i}" for i in range(N)])
348
+ D = estimate_idiosyncratic_variance(eps_df, min_obs=30)
349
+ assert np.isnan(D.iloc[0])
350
+ for i in range(1, N):
351
+ assert np.isfinite(D.iloc[i])
352
+
353
+
354
+ # ─── build_factor_risk_model (end-to-end) ────────────────────────────────────
355
+
356
+
357
+ class TestBuildFactorRiskModel:
358
+ def test_end_to_end_produces_F_and_D_with_metadata(self):
359
+ data = _synthetic_panel(N=30, K=4, T=200, seed=31)
360
+ out = build_factor_risk_model(
361
+ data["returns_df"], data["loadings_by_date"],
362
+ )
363
+ assert "F" in out and "D" in out and "metadata" in out
364
+ meta = out["metadata"]
365
+ assert meta["n_dates"] == 200
366
+ assert meta["n_clean_dates"] == 199 # first date NaN
367
+ assert meta["K_eff"] == 5 # 4 factors + intercept
368
+ assert meta["n_tickers"] == 30
369
+
370
+ def test_F_is_K_eff_x_K_eff_dataframe(self):
371
+ data = _synthetic_panel(N=20, K=3, T=150, seed=32)
372
+ out = build_factor_risk_model(
373
+ data["returns_df"], data["loadings_by_date"],
374
+ )
375
+ assert out["F"].shape == (4, 4)
376
+ # Indexed by factor names with "market" first
377
+ assert list(out["F"].columns) == ["market", "f0", "f1", "f2"]
378
+ assert list(out["F"].index) == ["market", "f0", "f1", "f2"]
379
+
380
+ def test_D_indexed_by_ticker(self):
381
+ data = _synthetic_panel(N=20, K=3, T=150, seed=33)
382
+ out = build_factor_risk_model(
383
+ data["returns_df"], data["loadings_by_date"],
384
+ )
385
+ assert list(out["D"].index) == data["tickers"]
386
+ assert (out["D"].dropna() > 0).all()
387
+
388
+ def test_can_disable_intercept(self):
389
+ data = _synthetic_panel(N=20, K=3, T=150, seed=34)
390
+ out = build_factor_risk_model(
391
+ data["returns_df"], data["loadings_by_date"],
392
+ include_intercept=False,
393
+ )
394
+ assert out["metadata"]["K_eff"] == 3
395
+ assert "market" not in out["F"].columns
396
+
397
+ def test_reconstructed_Sigma_is_PSD(self):
398
+ """The whole point: Σ = B·F·Bᵀ + D must be PSD so the executor's
399
+ cvxpy solver can ingest it. Verify on the synthetic recovery case
400
+ (no intercept — caller assembles a B that matches the F shape)."""
401
+ data = _synthetic_panel(N=25, K=4, T=300, seed=35)
402
+ out = build_factor_risk_model(
403
+ data["returns_df"], data["loadings_by_date"],
404
+ include_intercept=False,
405
+ )
406
+ B = data["B_true"] # (N, K)
407
+ F = out["F"].values # (K, K)
408
+ D = out["D"].fillna(out["D"].dropna().mean()).values # (N,)
409
+ Sigma = B @ F @ B.T + np.diag(D)
410
+ eigvals = np.linalg.eigvalsh(Sigma)
411
+ assert eigvals.min() >= -1e-10, (
412
+ f"Reconstructed Σ must be PSD; got min eigval={eigvals.min()}"
413
+ )