factrix 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. factrix/__init__.py +122 -0
  2. factrix/_analysis_config.py +235 -0
  3. factrix/_axis.py +57 -0
  4. factrix/_codes.py +194 -0
  5. factrix/_describe.py +471 -0
  6. factrix/_errors.py +89 -0
  7. factrix/_evaluate.py +99 -0
  8. factrix/_logging.py +45 -0
  9. factrix/_metric_index.py +229 -0
  10. factrix/_multi_factor.py +169 -0
  11. factrix/_ols.py +78 -0
  12. factrix/_procedures.py +711 -0
  13. factrix/_profile.py +103 -0
  14. factrix/_registry.py +176 -0
  15. factrix/_stats/__init__.py +511 -0
  16. factrix/_stats/constants.py +61 -0
  17. factrix/_types.py +120 -0
  18. factrix/_validators.py +109 -0
  19. factrix/adapt.py +139 -0
  20. factrix/datasets.py +275 -0
  21. factrix/llms-full.txt +325 -0
  22. factrix/llms.txt +19 -0
  23. factrix/metrics/__init__.py +148 -0
  24. factrix/metrics/_helpers.py +343 -0
  25. factrix/metrics/caar.py +497 -0
  26. factrix/metrics/clustering.py +101 -0
  27. factrix/metrics/concentration.py +195 -0
  28. factrix/metrics/corrado.py +139 -0
  29. factrix/metrics/event_horizon.py +326 -0
  30. factrix/metrics/event_quality.py +409 -0
  31. factrix/metrics/fama_macbeth.py +592 -0
  32. factrix/metrics/hit_rate.py +97 -0
  33. factrix/metrics/ic.py +612 -0
  34. factrix/metrics/mfe_mae.py +304 -0
  35. factrix/metrics/monotonicity.py +146 -0
  36. factrix/metrics/oos.py +209 -0
  37. factrix/metrics/quantile.py +416 -0
  38. factrix/metrics/spanning.py +406 -0
  39. factrix/metrics/tradability.py +508 -0
  40. factrix/metrics/trend.py +151 -0
  41. factrix/metrics/ts_asymmetry.py +245 -0
  42. factrix/metrics/ts_beta.py +436 -0
  43. factrix/metrics/ts_quantile.py +221 -0
  44. factrix/multi_factor.py +11 -0
  45. factrix/preprocess/__init__.py +1 -0
  46. factrix/preprocess/normalize.py +74 -0
  47. factrix/preprocess/orthogonalize.py +176 -0
  48. factrix/preprocess/returns.py +104 -0
  49. factrix/stats/__init__.py +18 -0
  50. factrix/stats/bootstrap.py +143 -0
  51. factrix/stats/multiple_testing.py +153 -0
  52. factrix-0.8.0.dist-info/METADATA +139 -0
  53. factrix-0.8.0.dist-info/RECORD +56 -0
  54. factrix-0.8.0.dist-info/WHEEL +5 -0
  55. factrix-0.8.0.dist-info/licenses/LICENSE +218 -0
  56. factrix-0.8.0.dist-info/top_level.txt +1 -0
factrix/__init__.py ADDED
@@ -0,0 +1,122 @@
1
+ """factrix — Single-factor evaluation toolkit (v0.5).
2
+
3
+ Three orthogonal user-facing axes — ``FactorScope``, ``Signal``,
4
+ ``Metric`` — plus an evaluate-time-derived ``Mode`` define the analysis
5
+ cell. Construct a config via the four type-safe factories on
6
+ ``AnalysisConfig``, dispatch via ``evaluate()``, inspect via the
7
+ returned ``FactorProfile``, and aggregate across factors with
8
+ ``multi_factor.bhy`` for FDR-corrected screening.
9
+
10
+ Single-factor::
11
+
12
+ import factrix as fl
13
+
14
+ cfg = fl.AnalysisConfig.individual_continuous(metric=fl.Metric.IC)
15
+ profile = fl.evaluate(panel, cfg)
16
+ print(profile.verdict(), profile.primary_p)
17
+ print(profile.diagnose())
18
+
19
+ Batch + BHY::
20
+
21
+ profiles = [fl.evaluate(panel, cfg) for cfg in candidate_configs]
22
+ survivors = fl.multi_factor.bhy(profiles, threshold=0.05)
23
+
24
+ Schema reflection::
25
+
26
+ print(fl.describe_analysis_modes())
27
+ print(fl.suggest_config(panel))
28
+
29
+ LLM agent reference: ``llms-full.txt`` covers concepts, public API, and
30
+ typical usage patterns in a single fetch. Two access paths::
31
+
32
+ # Web — deployed at the docs site root
33
+ https://awwesomeman.github.io/factrix/llms-full.txt
34
+
35
+ # Local — shipped inside the wheel as package data
36
+ import importlib.resources
37
+ text = importlib.resources.files("factrix").joinpath("llms-full.txt").read_text()
38
+ """
39
+
40
+ from factrix import datasets, multi_factor
41
+ from factrix._analysis_config import AnalysisConfig
42
+ from factrix._axis import ( # noqa: F401 Mode re-exported for namespace access; intentionally not in __all__
43
+ FactorScope,
44
+ Metric,
45
+ Mode,
46
+ Signal,
47
+ )
48
+ from factrix._codes import InfoCode, StatCode, Verdict, WarningCode
49
+ from factrix._describe import (
50
+ SuggestConfigResult,
51
+ describe_analysis_modes,
52
+ list_metrics,
53
+ suggest_config,
54
+ )
55
+ from factrix._errors import (
56
+ ConfigError,
57
+ FactrixError,
58
+ IncompatibleAxisError,
59
+ InsufficientSampleError,
60
+ MissingConfigError,
61
+ ModeAxisError,
62
+ )
63
+ from factrix._evaluate import _evaluate as _evaluate
64
+ from factrix._profile import FactorProfile
65
+ from factrix._types import MetricOutput
66
+
67
+
68
+ def evaluate(raw, config=None, /):
69
+ """Dispatch ``raw`` through the cell selected by ``config``.
70
+
71
+ Thin public wrapper around the private ``_evaluate`` dispatcher.
72
+ Intercepts the common onboarding miss — ``evaluate(panel)`` — with
73
+ a friendly :class:`MissingConfigError` pointing at
74
+ :func:`suggest_config` and the Get Started guide.
75
+ """
76
+ if config is None:
77
+ raise MissingConfigError(
78
+ "evaluate() requires an AnalysisConfig. "
79
+ "Call factrix.suggest_config(raw) for a recommendation, "
80
+ "or see the Get Started guide: "
81
+ "https://awwesomeman.github.io/factrix/getting-started/"
82
+ )
83
+ return _evaluate(raw, config)
84
+
85
+
86
+ __version__ = "0.8.0"
87
+
88
+ __all__ = [
89
+ # Configuration
90
+ "AnalysisConfig",
91
+ # Axis enums (Mode intentionally NOT exported — it is derived at
92
+ # evaluate-time from N and read off profile.mode, never set by user
93
+ # code; review fix UX-7. Still importable from factrix._axis.)
94
+ "FactorScope",
95
+ "Metric",
96
+ "Signal",
97
+ # Code enums
98
+ "InfoCode",
99
+ "StatCode",
100
+ "Verdict",
101
+ "WarningCode",
102
+ # Errors
103
+ "ConfigError",
104
+ "FactrixError",
105
+ "IncompatibleAxisError",
106
+ "InsufficientSampleError",
107
+ "MissingConfigError",
108
+ "ModeAxisError",
109
+ # Profile + dispatch
110
+ "FactorProfile",
111
+ "MetricOutput",
112
+ "evaluate",
113
+ # Introspection
114
+ "SuggestConfigResult",
115
+ "describe_analysis_modes",
116
+ "list_metrics",
117
+ "suggest_config",
118
+ # Multi-factor namespace
119
+ "multi_factor",
120
+ # Synthetic panels
121
+ "datasets",
122
+ ]
@@ -0,0 +1,235 @@
1
+ """v0.5 ``AnalysisConfig`` — three-axis orthogonal factor analysis spec (§4).
2
+
3
+ The user-facing surface is the four factory methods + ``from_dict`` /
4
+ ``to_dict``; ``__post_init__`` is the single source of truth for axis
5
+ validation, reachable from every path that produces an ``AnalysisConfig``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+ from dataclasses import dataclass
12
+ from typing import Any, Self
13
+
14
+ from factrix._axis import FactorScope, Metric, Mode, Signal
15
+ from factrix._errors import IncompatibleAxisError
16
+ from factrix._registry import matches_user_axis
17
+
18
+ # Nearest-legal cell suggested when an evaluate-time mode/sample check
19
+ # fails (§4.5 A4). Keyed by ``(scope, signal, mode)``; values are
20
+ # zero-arg factories so cycles via ``AnalysisConfig`` resolve lazily
21
+ # (factory call sites need ``AnalysisConfig`` defined; lazy lambdas
22
+ # defer the lookup until raise time, after class definition).
23
+ #
24
+ # Intentionally narrow — every other legal user triple has a registered
25
+ # PANEL *and* TIMESERIES procedure, so ``_evaluate`` never reaches the
26
+ # fallback path for them. Only ``(INDIVIDUAL, CONTINUOUS, *)`` lacks a
27
+ # TIMESERIES cell (no cross-sectional dispersion at N=1 → IC and per-date
28
+ # OLS undefined, §5.5). Adding a TIMESERIES-less cell → add one entry; do
29
+ # not encode the suggestion at the ``raise`` site.
30
+ _FALLBACK_MAP: dict[
31
+ tuple[FactorScope, Signal, Mode],
32
+ Callable[[], AnalysisConfig],
33
+ ] = {
34
+ (
35
+ FactorScope.INDIVIDUAL,
36
+ Signal.CONTINUOUS,
37
+ Mode.TIMESERIES,
38
+ ): lambda: AnalysisConfig.common_continuous(),
39
+ }
40
+
41
+
42
+ def _validate_axis_compat(
43
+ scope: FactorScope,
44
+ signal: Signal,
45
+ metric: Metric | None,
46
+ ) -> None:
47
+ """Raise ``IncompatibleAxisError`` if the triple is not a legal cell.
48
+
49
+ Reverse-queries the registry SSOT (§4.4 A1) — any registered
50
+ ``_DispatchKey`` whose ``(signal, metric)`` matches and whose scope
51
+ either equals ``scope`` or is the collapse sentinel admits the
52
+ triple. Called from ``AnalysisConfig.__post_init__`` so every
53
+ construction path (factory, direct, ``from_dict``) hits one gate.
54
+ """
55
+ if matches_user_axis(scope, signal, metric):
56
+ return
57
+ metric_repr = metric.value if metric is not None else None
58
+ # UX-8 from review: lead with the actionable factory list, leave the
59
+ # tuple enumeration as a parenthetical for users debugging by hand.
60
+ raise IncompatibleAxisError(
61
+ f"({scope.value}, {signal.value}, {metric_repr}) is not a legal "
62
+ "analysis cell. Use one of the four factory methods:\n"
63
+ " AnalysisConfig.individual_continuous(metric=Metric.IC|Metric.FM)\n"
64
+ " AnalysisConfig.individual_sparse()\n"
65
+ " AnalysisConfig.common_continuous()\n"
66
+ " AnalysisConfig.common_sparse()\n"
67
+ "(legal tuples: (individual, continuous, ic), "
68
+ "(individual, continuous, fm), (individual, sparse, None), "
69
+ "(common, continuous, None), (common, sparse, None).)"
70
+ )
71
+
72
+
73
+ @dataclass(frozen=True, slots=True)
74
+ class AnalysisConfig:
75
+ """Three-axis spec for a single-factor analysis.
76
+
77
+ Construct via the four factory methods (the supported public API);
78
+ direct construction works but bypasses no validation — every path
79
+ runs through ``__post_init__``.
80
+
81
+ Attributes:
82
+ scope: Factor scope axis. ``INDIVIDUAL`` = per-asset factor;
83
+ ``COMMON`` = single broadcast value per date.
84
+ signal: Signal type axis. ``CONTINUOUS`` = real-valued;
85
+ ``SPARSE`` = ``{-1, 0, +1}`` trigger.
86
+ metric: Procedure metric axis. Only populated for
87
+ ``(INDIVIDUAL, CONTINUOUS, *)`` cells (``IC`` or ``FM``);
88
+ ``None`` elsewhere.
89
+ forward_periods: Forward-return horizon in **rows** of the
90
+ panel's time axis, not calendar time. factrix never
91
+ inspects ``date`` dtype or spacing; the caller owns
92
+ frequency and regular spacing. ``forward_periods=5``
93
+ therefore means 5 trading days on a daily panel, 5 weeks
94
+ on a weekly panel, 5 minutes on a 1-min bar panel.
95
+ """
96
+
97
+ scope: FactorScope
98
+ signal: Signal
99
+ metric: Metric | None
100
+ forward_periods: int = 5
101
+
102
+ def __post_init__(self) -> None:
103
+ _validate_axis_compat(self.scope, self.signal, self.metric)
104
+
105
+ @classmethod
106
+ def individual_continuous(
107
+ cls,
108
+ *,
109
+ metric: Metric = Metric.IC,
110
+ forward_periods: int = 5,
111
+ ) -> Self:
112
+ """Per-(date, asset) continuous factor.
113
+
114
+ Args:
115
+ metric: ``IC`` for rank predictive ordering; ``FM`` for
116
+ unit-of-exposure premium (Fama-MacBeth λ).
117
+ forward_periods: Forward-return horizon (rows of the time
118
+ axis).
119
+
120
+ Returns:
121
+ A validated ``AnalysisConfig`` for the
122
+ ``(INDIVIDUAL, CONTINUOUS, metric)`` cell.
123
+ """
124
+ return cls(
125
+ FactorScope.INDIVIDUAL,
126
+ Signal.CONTINUOUS,
127
+ metric,
128
+ forward_periods=forward_periods,
129
+ )
130
+
131
+ @classmethod
132
+ def individual_sparse(cls, *, forward_periods: int = 5) -> Self:
133
+ """Per-(date, asset) sparse trigger (``{-1, 0, +1}``).
134
+
135
+ PANEL canonical procedure is the CAAR cross-event t-test;
136
+ TIMESERIES (N=1) collapses to a dummy regression with NW HAC
137
+ SE.
138
+
139
+ Args:
140
+ forward_periods: Forward-return horizon (rows of the time
141
+ axis).
142
+
143
+ Returns:
144
+ A validated ``AnalysisConfig`` for the
145
+ ``(INDIVIDUAL, SPARSE, None)`` cell.
146
+ """
147
+ return cls(
148
+ FactorScope.INDIVIDUAL,
149
+ Signal.SPARSE,
150
+ None,
151
+ forward_periods=forward_periods,
152
+ )
153
+
154
+ @classmethod
155
+ def common_continuous(cls, *, forward_periods: int = 5) -> Self:
156
+ """Broadcast continuous factor (e.g. VIX).
157
+
158
+ Canonical procedure is the per-asset β estimate followed by a
159
+ cross-asset t-test on ``E[β]``.
160
+
161
+ Args:
162
+ forward_periods: Forward-return horizon (rows of the time
163
+ axis).
164
+
165
+ Returns:
166
+ A validated ``AnalysisConfig`` for the
167
+ ``(COMMON, CONTINUOUS, None)`` cell.
168
+ """
169
+ return cls(
170
+ FactorScope.COMMON,
171
+ Signal.CONTINUOUS,
172
+ None,
173
+ forward_periods=forward_periods,
174
+ )
175
+
176
+ @classmethod
177
+ def common_sparse(cls, *, forward_periods: int = 5) -> Self:
178
+ """Broadcast sparse trigger (FOMC, policy, index rebalance).
179
+
180
+ PANEL canonical: per-asset β on dummy + cross-asset t-test.
181
+ TIMESERIES (N=1): TS dummy regression + NW HAC SE.
182
+
183
+ Args:
184
+ forward_periods: Forward-return horizon (rows of the time
185
+ axis).
186
+
187
+ Returns:
188
+ A validated ``AnalysisConfig`` for the
189
+ ``(COMMON, SPARSE, None)`` cell.
190
+ """
191
+ return cls(
192
+ FactorScope.COMMON,
193
+ Signal.SPARSE,
194
+ None,
195
+ forward_periods=forward_periods,
196
+ )
197
+
198
+ def to_dict(self) -> dict[str, Any]:
199
+ """Serialise to a JSON-compatible dict.
200
+
201
+ Returns:
202
+ A dict with string-valued enums and integer
203
+ ``forward_periods``, suitable for JSON serialisation.
204
+ """
205
+ return {
206
+ "scope": self.scope.value,
207
+ "signal": self.signal.value,
208
+ "metric": self.metric.value if self.metric is not None else None,
209
+ "forward_periods": self.forward_periods,
210
+ }
211
+
212
+ @classmethod
213
+ def from_dict(cls, d: dict[str, Any]) -> Self:
214
+ """Reconstruct from ``to_dict``'s output.
215
+
216
+ Goes through ``__post_init__``, so an invalid triple raises
217
+ ``IncompatibleAxisError`` instead of silently constructing.
218
+
219
+ Args:
220
+ d: Mapping in the shape produced by ``to_dict``.
221
+
222
+ Returns:
223
+ A validated ``AnalysisConfig``.
224
+
225
+ Raises:
226
+ IncompatibleAxisError: If the ``(scope, signal, metric)``
227
+ triple is not a legal cell.
228
+ """
229
+ m = d.get("metric")
230
+ return cls(
231
+ scope=FactorScope(d["scope"]),
232
+ signal=Signal(d["signal"]),
233
+ metric=Metric(m) if m is not None else None,
234
+ forward_periods=d.get("forward_periods", 5),
235
+ )
factrix/_axis.py ADDED
@@ -0,0 +1,57 @@
1
+ """v0.5 analysis-axis enums (§4.1 of refactor_api.md).
2
+
3
+ Three orthogonal user-facing axes describe an analysis cell:
4
+
5
+ - ``FactorScope`` — does the factor vary per-asset (``INDIVIDUAL``) or
6
+ carry a single value broadcast to every asset (``COMMON``)?
7
+ - ``Signal`` — continuous numeric exposure (``CONTINUOUS``) vs.
8
+ ``{0, R}`` event triggers (``SPARSE`` — zero on non-event entries,
9
+ arbitrary real magnitude otherwise; canonical example ``{-1, 0, +1}``)?
10
+ - ``Metric`` — procedure-canonical scalar (``IC`` or ``FM``).
11
+ Only meaningful for ``INDIVIDUAL × CONTINUOUS``; ``None`` for the
12
+ remaining cells.
13
+
14
+ ``Mode`` is the fourth axis used by registry keys / dispatch but is not
15
+ user-set: it is derived from ``N`` at evaluate-time and surfaced as
16
+ ``Profile.mode`` for downstream pattern-match.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from enum import StrEnum
22
+
23
+
24
+ class FactorScope(StrEnum):
25
+ """Does each asset have its own factor value, or do all share one?"""
26
+
27
+ INDIVIDUAL = "individual"
28
+ COMMON = "common"
29
+
30
+
31
+ class Signal(StrEnum):
32
+ """Continuous numeric exposure vs. ``{0, R}`` sparse event trigger.
33
+
34
+ Sparse columns are zero on non-event entries with arbitrary real
35
+ magnitude otherwise (canonical example ``{-1, 0, +1}``).
36
+ """
37
+
38
+ CONTINUOUS = "continuous"
39
+ SPARSE = "sparse"
40
+
41
+
42
+ class Metric(StrEnum):
43
+ """Procedure-canonical scalar for ``INDIVIDUAL × CONTINUOUS`` cells."""
44
+
45
+ IC = "ic"
46
+ FM = "fm"
47
+
48
+
49
+ class Mode(StrEnum):
50
+ """Sample regime, derived from ``N`` at evaluate-time.
51
+
52
+ ``PANEL`` — ``N >= 2`` (multi-asset / multi-event panel).
53
+ ``TIMESERIES`` — ``N == 1`` (single-asset time series).
54
+ """
55
+
56
+ PANEL = "panel"
57
+ TIMESERIES = "timeseries"
factrix/_codes.py ADDED
@@ -0,0 +1,194 @@
1
+ """v0.5 enum codes for warnings, info notes, cell stats, and verdicts.
2
+
3
+ ``WarningCode`` / ``InfoCode`` / ``StatCode`` follow the ``*Code`` suffix
4
+ invariant (§7.5).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from enum import StrEnum
10
+
11
+
12
+ class WarningCode(StrEnum):
13
+ """Procedure-degradation flags (replaces v3 ``DegradedMode``).
14
+
15
+ Each value carries a one-line ``description`` gloss for
16
+ ``profile.diagnose()`` consumers (review fix UX-4) — pure metadata,
17
+ StrEnum value identity is unchanged.
18
+ """
19
+
20
+ UNRELIABLE_SE_SHORT_PERIODS = "unreliable_se_short_periods"
21
+ EVENT_WINDOW_OVERLAP = "event_window_overlap"
22
+ # Fired when ADF p > 0.1 on a CONTINUOUS factor (Stambaugh-style
23
+ # persistent-regressor flag, §5.2 / §7.3). Not raised for SPARSE.
24
+ PERSISTENT_REGRESSOR = "persistent_regressor"
25
+ SERIAL_CORRELATION_DETECTED = "serial_correlation_detected"
26
+ # Two-tier cross-asset N guards for PANEL common_continuous. Mirrors
27
+ # the n_periods two-tier (UNRELIABLE_SE_SHORT_PERIODS) but the axis
28
+ # never raises — cross-asset t-test on E[β] is well-defined for N≥2.
29
+ SMALL_CROSS_SECTION_N = "small_cross_section_n"
30
+ BORDERLINE_CROSS_SECTION_N = "borderline_cross_section_n"
31
+ # Fired by the (COMMON, SPARSE, PANEL) procedure when the broadcast
32
+ # dummy carries MIN_BROADCAST_EVENTS_HARD ≤ n_events <
33
+ # MIN_BROADCAST_EVENTS_WARN. Per-asset β is identifiable but
34
+ # the cross-event averaging is too thin for asymptotic t to be
35
+ # trusted. Below the HARD floor raises InsufficientSampleError instead.
36
+ SPARSE_COMMON_FEW_EVENTS = "sparse_common_few_events"
37
+ # Fired when a sparse ``factor`` column carries mixed signs but is
38
+ # not a clean ±1 ternary (e.g. ``{-2.5, 0, +1.3}``). The CAAR /
39
+ # sparse-panel statistic is the magnitude-weighted Sefcik-Thompson
40
+ # (1986) variant, which differs from the textbook MacKinlay (1997)
41
+ # signed CAAR at finite samples when negative- and positive-leg
42
+ # vols disagree. ``{-1, 0, +1}`` does not trigger — sign and weight
43
+ # semantics coincide numerically. All-non-negative columns
44
+ # (``{0, 1}`` / ``{0, R≥0}``) do not trigger — no flip ambiguity.
45
+ SPARSE_MAGNITUDE_WEIGHTED = "sparse_magnitude_weighted"
46
+ # Fired by ``caar`` (significance test) when the per-event-date series
47
+ # length sits in ``[MIN_EVENTS_HARD, MIN_EVENTS_WARN)`` — the t-stat
48
+ # is returned but the Brown-Warner (1985) convention treats sub-30
49
+ # event-date counts as power-thin for the asymptotic t-distribution.
50
+ # Below the HARD floor the primitive short-circuits to NaN instead.
51
+ FEW_EVENTS_BROWN_WARNER = "few_events_brown_warner"
52
+ # Fired by ``top_concentration`` when the per-date ratio series sits
53
+ # in ``[MIN_PORTFOLIO_PERIODS_HARD, MIN_PORTFOLIO_PERIODS_WARN)`` —
54
+ # the one-sided t-test on the diversification ratio is returned but
55
+ # ``df = n - 1 < 19`` inflates t_crit relative to the asymptotic
56
+ # cutoff. Below the HARD floor the primitive short-circuits to NaN.
57
+ BORDERLINE_PORTFOLIO_PERIODS = "borderline_portfolio_periods"
58
+
59
+ @property
60
+ def description(self) -> str:
61
+ return _WARNING_DESCRIPTIONS[self]
62
+
63
+
64
+ _WARNING_DESCRIPTIONS: dict[WarningCode, str] = {}
65
+
66
+
67
+ _WARNING_DESCRIPTIONS.update(
68
+ {
69
+ WarningCode.UNRELIABLE_SE_SHORT_PERIODS: "n_periods is below the WARN floor (~30); NW HAC SE may be biased. "
70
+ "Reused across panel time-series guards (MIN_PERIODS_WARN) and "
71
+ "primitive inference (MIN_FM_PERIODS_WARN); both default to 30.",
72
+ WarningCode.EVENT_WINDOW_OVERLAP: "Adjacent events sit within forward_periods; AR windows overlap.",
73
+ WarningCode.PERSISTENT_REGRESSOR: "ADF p > 0.10 on the continuous factor; β may carry Stambaugh bias.",
74
+ WarningCode.SERIAL_CORRELATION_DETECTED: "Ljung-Box p < 0.05 on residuals; NW lag may be under-set.",
75
+ WarningCode.SMALL_CROSS_SECTION_N: "PANEL cross-asset t-test with n_assets < MIN_ASSETS (10); "
76
+ "df=n_assets-1 too low — t_crit at n_assets=3 ≈ 4.30 "
77
+ "(+119% vs asymptotic 1.96).",
78
+ WarningCode.BORDERLINE_CROSS_SECTION_N: "PANEL cross-asset t-test with MIN_ASSETS ≤ n_assets < "
79
+ "MIN_ASSETS_WARN (10..29); residual t_crit inflation "
80
+ "5–15% — read borderline p-values cautiously.",
81
+ WarningCode.SPARSE_COMMON_FEW_EVENTS: "(COMMON, SPARSE, PANEL) broadcast dummy has "
82
+ "MIN_BROADCAST_EVENTS_HARD ≤ n_events < MIN_BROADCAST_EVENTS_WARN "
83
+ "(5..19); per-asset β estimable but cross-event averaging too thin "
84
+ "for asymptotic t.",
85
+ WarningCode.SPARSE_MAGNITUDE_WEIGHTED: "Sparse factor column is mixed-sign and not a "
86
+ "clean ±1 ternary; statistic is magnitude-weighted (Sefcik-Thompson) "
87
+ "rather than textbook MacKinlay signed CAAR — apply .sign() before "
88
+ "calling for sign-flip semantics.",
89
+ WarningCode.FEW_EVENTS_BROWN_WARNER: "CAAR significance test with MIN_EVENTS_HARD ≤ "
90
+ "n_event_dates < MIN_EVENTS_WARN (4..29); t-stat returned but "
91
+ "Brown-Warner (1985) convention treats sub-30 events as power-thin "
92
+ "for the asymptotic t-distribution — read borderline p-values cautiously.",
93
+ WarningCode.BORDERLINE_PORTFOLIO_PERIODS: "top_concentration with MIN_PORTFOLIO_PERIODS_HARD "
94
+ "≤ n_periods < MIN_PORTFOLIO_PERIODS_WARN (3..19); one-sided t-test "
95
+ "on the per-date diversification ratio is returned but df=n-1 inflates "
96
+ "t_crit relative to the asymptotic cutoff.",
97
+ }
98
+ )
99
+
100
+
101
+ def cross_section_tier(n_assets: int) -> WarningCode | None:
102
+ """Map an inference-stage cross-asset N to the appropriate warning code.
103
+
104
+ The argument is the **inference-stage** N — the count of assets
105
+ actually entering the cross-asset test, not the panel-union
106
+ ``FactorProfile.n_assets`` surface field. For ``(COMMON, *, None,
107
+ PANEL)`` cells the two differ: ``compute_ts_betas`` drops assets
108
+ with fewer than ``MIN_TS_OBS`` non-null observations, so the union
109
+ can be materially larger than the post-filter count that drives
110
+ ``primary_p``'s ``dof = N - 1``. Callers (``suggest_config``,
111
+ ``_compute_common_panel``) therefore pre-filter before calling.
112
+
113
+ Tiers are mutually exclusive — SMALL is strictly more severe than
114
+ BORDERLINE — so callers can membership-check the more severe code
115
+ without an else branch. Returns ``None`` at ``n_assets ≥
116
+ MIN_ASSETS_WARN`` (clean) or ``n_assets < 2`` (PANEL impossible
117
+ by upstream mode routing; defensive).
118
+ """
119
+ from factrix._stats.constants import MIN_ASSETS, MIN_ASSETS_WARN
120
+
121
+ if 2 <= n_assets < MIN_ASSETS:
122
+ return WarningCode.SMALL_CROSS_SECTION_N
123
+ if MIN_ASSETS <= n_assets < MIN_ASSETS_WARN:
124
+ return WarningCode.BORDERLINE_CROSS_SECTION_N
125
+ return None
126
+
127
+
128
+ class InfoCode(StrEnum):
129
+ """Neutral facts surfaced to the caller — not warnings, not errors."""
130
+
131
+ SCOPE_AXIS_COLLAPSED = "scope_axis_collapsed"
132
+
133
+ @property
134
+ def description(self) -> str:
135
+ return _INFO_DESCRIPTIONS[self]
136
+
137
+
138
+ _INFO_DESCRIPTIONS: dict[InfoCode, str] = {
139
+ InfoCode.SCOPE_AXIS_COLLAPSED: "N=1 collapsed scope axis; routed via _SCOPE_COLLAPSED sentinel.",
140
+ }
141
+
142
+
143
+ class StatCode(StrEnum):
144
+ """Cell-specific scalar stats keyed in ``FactorProfile.stats``.
145
+
146
+ Adding a new metric → add an enum value here + populate it in the
147
+ procedure. Profile schema does not grow. Stats fall in three
148
+ families:
149
+
150
+ - **p-values**: identifier ends in ``_p`` (``IC_P`` / ``FM_LAMBDA_P``
151
+ / ``TS_BETA_P`` / ``CAAR_P`` plus the diagnostic-only
152
+ ``FACTOR_ADF_P`` / ``LJUNG_BOX_P``). ``is_p_value`` returns
153
+ ``True``. These are the only codes ``multi_factor.bhy`` will
154
+ accept as a ``gate=`` override (BHY step-up requires probabilities
155
+ — feeding it t-stats yields nonsense FDR control).
156
+ - **t-stats** / effect-size means / lag counts / HHI: ``is_p_value``
157
+ returns ``False``. ``profile.verdict(gate=...)`` accepts these
158
+ (the comparison is generic ``value < threshold`` — interpretation
159
+ is the caller's call) but ``bhy(gate=...)`` rejects them.
160
+ """
161
+
162
+ IC_MEAN = "ic_mean"
163
+ IC_T_NW = "ic_t_nw"
164
+ IC_P = "ic_p"
165
+ FM_LAMBDA_MEAN = "fm_lambda_mean"
166
+ FM_LAMBDA_T_NW = "fm_lambda_t_nw"
167
+ FM_LAMBDA_P = "fm_lambda_p"
168
+ TS_BETA = "ts_beta"
169
+ TS_BETA_T_NW = "ts_beta_t_nw"
170
+ TS_BETA_P = "ts_beta_p"
171
+ CAAR_MEAN = "caar_mean"
172
+ CAAR_T_NW = "caar_t_nw"
173
+ CAAR_P = "caar_p"
174
+ FACTOR_ADF_P = "factor_adf_p"
175
+ LJUNG_BOX_P = "ljung_box_p"
176
+ EVENT_TEMPORAL_HHI = "event_temporal_hhi"
177
+ NW_LAGS_USED = "nw_lags_used"
178
+
179
+ @property
180
+ def is_p_value(self) -> bool:
181
+ """``True`` iff this stat is a probability in [0, 1].
182
+
183
+ Used by ``multi_factor.bhy`` to gatekeep the ``gate=`` override
184
+ — BHY step-up math requires p-values, so feeding a t-stat would
185
+ silently corrupt FDR control.
186
+ """
187
+ return self.value.endswith("_p")
188
+
189
+
190
+ class Verdict(StrEnum):
191
+ """Procedure-canonical pass/fail outcome of ``Profile.verdict()``."""
192
+
193
+ PASS = "pass"
194
+ FAIL = "fail"