factrix 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- factrix/__init__.py +122 -0
- factrix/_analysis_config.py +235 -0
- factrix/_axis.py +57 -0
- factrix/_codes.py +194 -0
- factrix/_describe.py +471 -0
- factrix/_errors.py +89 -0
- factrix/_evaluate.py +99 -0
- factrix/_logging.py +45 -0
- factrix/_metric_index.py +229 -0
- factrix/_multi_factor.py +169 -0
- factrix/_ols.py +78 -0
- factrix/_procedures.py +711 -0
- factrix/_profile.py +103 -0
- factrix/_registry.py +176 -0
- factrix/_stats/__init__.py +511 -0
- factrix/_stats/constants.py +61 -0
- factrix/_types.py +120 -0
- factrix/_validators.py +109 -0
- factrix/adapt.py +139 -0
- factrix/datasets.py +275 -0
- factrix/llms-full.txt +325 -0
- factrix/llms.txt +19 -0
- factrix/metrics/__init__.py +148 -0
- factrix/metrics/_helpers.py +343 -0
- factrix/metrics/caar.py +497 -0
- factrix/metrics/clustering.py +101 -0
- factrix/metrics/concentration.py +195 -0
- factrix/metrics/corrado.py +139 -0
- factrix/metrics/event_horizon.py +326 -0
- factrix/metrics/event_quality.py +409 -0
- factrix/metrics/fama_macbeth.py +592 -0
- factrix/metrics/hit_rate.py +97 -0
- factrix/metrics/ic.py +612 -0
- factrix/metrics/mfe_mae.py +304 -0
- factrix/metrics/monotonicity.py +146 -0
- factrix/metrics/oos.py +209 -0
- factrix/metrics/quantile.py +416 -0
- factrix/metrics/spanning.py +406 -0
- factrix/metrics/tradability.py +508 -0
- factrix/metrics/trend.py +151 -0
- factrix/metrics/ts_asymmetry.py +245 -0
- factrix/metrics/ts_beta.py +436 -0
- factrix/metrics/ts_quantile.py +221 -0
- factrix/multi_factor.py +11 -0
- factrix/preprocess/__init__.py +1 -0
- factrix/preprocess/normalize.py +74 -0
- factrix/preprocess/orthogonalize.py +176 -0
- factrix/preprocess/returns.py +104 -0
- factrix/stats/__init__.py +18 -0
- factrix/stats/bootstrap.py +143 -0
- factrix/stats/multiple_testing.py +153 -0
- factrix-0.8.0.dist-info/METADATA +139 -0
- factrix-0.8.0.dist-info/RECORD +56 -0
- factrix-0.8.0.dist-info/WHEEL +5 -0
- factrix-0.8.0.dist-info/licenses/LICENSE +218 -0
- factrix-0.8.0.dist-info/top_level.txt +1 -0
factrix/__init__.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""factrix — Single-factor evaluation toolkit (v0.5).
|
|
2
|
+
|
|
3
|
+
Three orthogonal user-facing axes — ``FactorScope``, ``Signal``,
|
|
4
|
+
``Metric`` — plus an evaluate-time-derived ``Mode`` define the analysis
|
|
5
|
+
cell. Construct a config via the four type-safe factories on
|
|
6
|
+
``AnalysisConfig``, dispatch via ``evaluate()``, inspect via the
|
|
7
|
+
returned ``FactorProfile``, and aggregate across factors with
|
|
8
|
+
``multi_factor.bhy`` for FDR-corrected screening.
|
|
9
|
+
|
|
10
|
+
Single-factor::
|
|
11
|
+
|
|
12
|
+
import factrix as fl
|
|
13
|
+
|
|
14
|
+
cfg = fl.AnalysisConfig.individual_continuous(metric=fl.Metric.IC)
|
|
15
|
+
profile = fl.evaluate(panel, cfg)
|
|
16
|
+
print(profile.verdict(), profile.primary_p)
|
|
17
|
+
print(profile.diagnose())
|
|
18
|
+
|
|
19
|
+
Batch + BHY::
|
|
20
|
+
|
|
21
|
+
profiles = [fl.evaluate(panel, cfg) for cfg in candidate_configs]
|
|
22
|
+
survivors = fl.multi_factor.bhy(profiles, threshold=0.05)
|
|
23
|
+
|
|
24
|
+
Schema reflection::
|
|
25
|
+
|
|
26
|
+
print(fl.describe_analysis_modes())
|
|
27
|
+
print(fl.suggest_config(panel))
|
|
28
|
+
|
|
29
|
+
LLM agent reference: ``llms-full.txt`` covers concepts, public API, and
|
|
30
|
+
typical usage patterns in a single fetch. Two access paths::
|
|
31
|
+
|
|
32
|
+
# Web — deployed at the docs site root
|
|
33
|
+
https://awwesomeman.github.io/factrix/llms-full.txt
|
|
34
|
+
|
|
35
|
+
# Local — shipped inside the wheel as package data
|
|
36
|
+
import importlib.resources
|
|
37
|
+
text = importlib.resources.files("factrix").joinpath("llms-full.txt").read_text()
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
from factrix import datasets, multi_factor
|
|
41
|
+
from factrix._analysis_config import AnalysisConfig
|
|
42
|
+
from factrix._axis import ( # noqa: F401 Mode re-exported for namespace access; intentionally not in __all__
|
|
43
|
+
FactorScope,
|
|
44
|
+
Metric,
|
|
45
|
+
Mode,
|
|
46
|
+
Signal,
|
|
47
|
+
)
|
|
48
|
+
from factrix._codes import InfoCode, StatCode, Verdict, WarningCode
|
|
49
|
+
from factrix._describe import (
|
|
50
|
+
SuggestConfigResult,
|
|
51
|
+
describe_analysis_modes,
|
|
52
|
+
list_metrics,
|
|
53
|
+
suggest_config,
|
|
54
|
+
)
|
|
55
|
+
from factrix._errors import (
|
|
56
|
+
ConfigError,
|
|
57
|
+
FactrixError,
|
|
58
|
+
IncompatibleAxisError,
|
|
59
|
+
InsufficientSampleError,
|
|
60
|
+
MissingConfigError,
|
|
61
|
+
ModeAxisError,
|
|
62
|
+
)
|
|
63
|
+
from factrix._evaluate import _evaluate as _evaluate
|
|
64
|
+
from factrix._profile import FactorProfile
|
|
65
|
+
from factrix._types import MetricOutput
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def evaluate(raw, config=None, /):
|
|
69
|
+
"""Dispatch ``raw`` through the cell selected by ``config``.
|
|
70
|
+
|
|
71
|
+
Thin public wrapper around the private ``_evaluate`` dispatcher.
|
|
72
|
+
Intercepts the common onboarding miss — ``evaluate(panel)`` — with
|
|
73
|
+
a friendly :class:`MissingConfigError` pointing at
|
|
74
|
+
:func:`suggest_config` and the Get Started guide.
|
|
75
|
+
"""
|
|
76
|
+
if config is None:
|
|
77
|
+
raise MissingConfigError(
|
|
78
|
+
"evaluate() requires an AnalysisConfig. "
|
|
79
|
+
"Call factrix.suggest_config(raw) for a recommendation, "
|
|
80
|
+
"or see the Get Started guide: "
|
|
81
|
+
"https://awwesomeman.github.io/factrix/getting-started/"
|
|
82
|
+
)
|
|
83
|
+
return _evaluate(raw, config)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
__version__ = "0.8.0"
|
|
87
|
+
|
|
88
|
+
__all__ = [
|
|
89
|
+
# Configuration
|
|
90
|
+
"AnalysisConfig",
|
|
91
|
+
# Axis enums (Mode intentionally NOT exported — it is derived at
|
|
92
|
+
# evaluate-time from N and read off profile.mode, never set by user
|
|
93
|
+
# code; review fix UX-7. Still importable from factrix._axis.)
|
|
94
|
+
"FactorScope",
|
|
95
|
+
"Metric",
|
|
96
|
+
"Signal",
|
|
97
|
+
# Code enums
|
|
98
|
+
"InfoCode",
|
|
99
|
+
"StatCode",
|
|
100
|
+
"Verdict",
|
|
101
|
+
"WarningCode",
|
|
102
|
+
# Errors
|
|
103
|
+
"ConfigError",
|
|
104
|
+
"FactrixError",
|
|
105
|
+
"IncompatibleAxisError",
|
|
106
|
+
"InsufficientSampleError",
|
|
107
|
+
"MissingConfigError",
|
|
108
|
+
"ModeAxisError",
|
|
109
|
+
# Profile + dispatch
|
|
110
|
+
"FactorProfile",
|
|
111
|
+
"MetricOutput",
|
|
112
|
+
"evaluate",
|
|
113
|
+
# Introspection
|
|
114
|
+
"SuggestConfigResult",
|
|
115
|
+
"describe_analysis_modes",
|
|
116
|
+
"list_metrics",
|
|
117
|
+
"suggest_config",
|
|
118
|
+
# Multi-factor namespace
|
|
119
|
+
"multi_factor",
|
|
120
|
+
# Synthetic panels
|
|
121
|
+
"datasets",
|
|
122
|
+
]
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""v0.5 ``AnalysisConfig`` — three-axis orthogonal factor analysis spec (§4).
|
|
2
|
+
|
|
3
|
+
The user-facing surface is the four factory methods + ``from_dict`` /
|
|
4
|
+
``to_dict``; ``__post_init__`` is the single source of truth for axis
|
|
5
|
+
validation, reachable from every path that produces an ``AnalysisConfig``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import Any, Self
|
|
13
|
+
|
|
14
|
+
from factrix._axis import FactorScope, Metric, Mode, Signal
|
|
15
|
+
from factrix._errors import IncompatibleAxisError
|
|
16
|
+
from factrix._registry import matches_user_axis
|
|
17
|
+
|
|
18
|
+
# Nearest-legal cell suggested when an evaluate-time mode/sample check
|
|
19
|
+
# fails (§4.5 A4). Keyed by ``(scope, signal, mode)``; values are
|
|
20
|
+
# zero-arg factories so cycles via ``AnalysisConfig`` resolve lazily
|
|
21
|
+
# (factory call sites need ``AnalysisConfig`` defined; lazy lambdas
|
|
22
|
+
# defer the lookup until raise time, after class definition).
|
|
23
|
+
#
|
|
24
|
+
# Intentionally narrow — every other legal user triple has a registered
|
|
25
|
+
# PANEL *and* TIMESERIES procedure, so ``_evaluate`` never reaches the
|
|
26
|
+
# fallback path for them. Only ``(INDIVIDUAL, CONTINUOUS, *)`` lacks a
|
|
27
|
+
# TIMESERIES cell (no cross-sectional dispersion at N=1 → IC and per-date
|
|
28
|
+
# OLS undefined, §5.5). Adding a TIMESERIES-less cell → add one entry; do
|
|
29
|
+
# not encode the suggestion at the ``raise`` site.
|
|
30
|
+
_FALLBACK_MAP: dict[
|
|
31
|
+
tuple[FactorScope, Signal, Mode],
|
|
32
|
+
Callable[[], AnalysisConfig],
|
|
33
|
+
] = {
|
|
34
|
+
(
|
|
35
|
+
FactorScope.INDIVIDUAL,
|
|
36
|
+
Signal.CONTINUOUS,
|
|
37
|
+
Mode.TIMESERIES,
|
|
38
|
+
): lambda: AnalysisConfig.common_continuous(),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _validate_axis_compat(
|
|
43
|
+
scope: FactorScope,
|
|
44
|
+
signal: Signal,
|
|
45
|
+
metric: Metric | None,
|
|
46
|
+
) -> None:
|
|
47
|
+
"""Raise ``IncompatibleAxisError`` if the triple is not a legal cell.
|
|
48
|
+
|
|
49
|
+
Reverse-queries the registry SSOT (§4.4 A1) — any registered
|
|
50
|
+
``_DispatchKey`` whose ``(signal, metric)`` matches and whose scope
|
|
51
|
+
either equals ``scope`` or is the collapse sentinel admits the
|
|
52
|
+
triple. Called from ``AnalysisConfig.__post_init__`` so every
|
|
53
|
+
construction path (factory, direct, ``from_dict``) hits one gate.
|
|
54
|
+
"""
|
|
55
|
+
if matches_user_axis(scope, signal, metric):
|
|
56
|
+
return
|
|
57
|
+
metric_repr = metric.value if metric is not None else None
|
|
58
|
+
# UX-8 from review: lead with the actionable factory list, leave the
|
|
59
|
+
# tuple enumeration as a parenthetical for users debugging by hand.
|
|
60
|
+
raise IncompatibleAxisError(
|
|
61
|
+
f"({scope.value}, {signal.value}, {metric_repr}) is not a legal "
|
|
62
|
+
"analysis cell. Use one of the four factory methods:\n"
|
|
63
|
+
" AnalysisConfig.individual_continuous(metric=Metric.IC|Metric.FM)\n"
|
|
64
|
+
" AnalysisConfig.individual_sparse()\n"
|
|
65
|
+
" AnalysisConfig.common_continuous()\n"
|
|
66
|
+
" AnalysisConfig.common_sparse()\n"
|
|
67
|
+
"(legal tuples: (individual, continuous, ic), "
|
|
68
|
+
"(individual, continuous, fm), (individual, sparse, None), "
|
|
69
|
+
"(common, continuous, None), (common, sparse, None).)"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass(frozen=True, slots=True)
|
|
74
|
+
class AnalysisConfig:
|
|
75
|
+
"""Three-axis spec for a single-factor analysis.
|
|
76
|
+
|
|
77
|
+
Construct via the four factory methods (the supported public API);
|
|
78
|
+
direct construction works but bypasses no validation — every path
|
|
79
|
+
runs through ``__post_init__``.
|
|
80
|
+
|
|
81
|
+
Attributes:
|
|
82
|
+
scope: Factor scope axis. ``INDIVIDUAL`` = per-asset factor;
|
|
83
|
+
``COMMON`` = single broadcast value per date.
|
|
84
|
+
signal: Signal type axis. ``CONTINUOUS`` = real-valued;
|
|
85
|
+
``SPARSE`` = ``{-1, 0, +1}`` trigger.
|
|
86
|
+
metric: Procedure metric axis. Only populated for
|
|
87
|
+
``(INDIVIDUAL, CONTINUOUS, *)`` cells (``IC`` or ``FM``);
|
|
88
|
+
``None`` elsewhere.
|
|
89
|
+
forward_periods: Forward-return horizon in **rows** of the
|
|
90
|
+
panel's time axis, not calendar time. factrix never
|
|
91
|
+
inspects ``date`` dtype or spacing; the caller owns
|
|
92
|
+
frequency and regular spacing. ``forward_periods=5``
|
|
93
|
+
therefore means 5 trading days on a daily panel, 5 weeks
|
|
94
|
+
on a weekly panel, 5 minutes on a 1-min bar panel.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
scope: FactorScope
|
|
98
|
+
signal: Signal
|
|
99
|
+
metric: Metric | None
|
|
100
|
+
forward_periods: int = 5
|
|
101
|
+
|
|
102
|
+
def __post_init__(self) -> None:
|
|
103
|
+
_validate_axis_compat(self.scope, self.signal, self.metric)
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def individual_continuous(
|
|
107
|
+
cls,
|
|
108
|
+
*,
|
|
109
|
+
metric: Metric = Metric.IC,
|
|
110
|
+
forward_periods: int = 5,
|
|
111
|
+
) -> Self:
|
|
112
|
+
"""Per-(date, asset) continuous factor.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
metric: ``IC`` for rank predictive ordering; ``FM`` for
|
|
116
|
+
unit-of-exposure premium (Fama-MacBeth λ).
|
|
117
|
+
forward_periods: Forward-return horizon (rows of the time
|
|
118
|
+
axis).
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
A validated ``AnalysisConfig`` for the
|
|
122
|
+
``(INDIVIDUAL, CONTINUOUS, metric)`` cell.
|
|
123
|
+
"""
|
|
124
|
+
return cls(
|
|
125
|
+
FactorScope.INDIVIDUAL,
|
|
126
|
+
Signal.CONTINUOUS,
|
|
127
|
+
metric,
|
|
128
|
+
forward_periods=forward_periods,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def individual_sparse(cls, *, forward_periods: int = 5) -> Self:
|
|
133
|
+
"""Per-(date, asset) sparse trigger (``{-1, 0, +1}``).
|
|
134
|
+
|
|
135
|
+
PANEL canonical procedure is the CAAR cross-event t-test;
|
|
136
|
+
TIMESERIES (N=1) collapses to a dummy regression with NW HAC
|
|
137
|
+
SE.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
forward_periods: Forward-return horizon (rows of the time
|
|
141
|
+
axis).
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
A validated ``AnalysisConfig`` for the
|
|
145
|
+
``(INDIVIDUAL, SPARSE, None)`` cell.
|
|
146
|
+
"""
|
|
147
|
+
return cls(
|
|
148
|
+
FactorScope.INDIVIDUAL,
|
|
149
|
+
Signal.SPARSE,
|
|
150
|
+
None,
|
|
151
|
+
forward_periods=forward_periods,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def common_continuous(cls, *, forward_periods: int = 5) -> Self:
|
|
156
|
+
"""Broadcast continuous factor (e.g. VIX).
|
|
157
|
+
|
|
158
|
+
Canonical procedure is the per-asset β estimate followed by a
|
|
159
|
+
cross-asset t-test on ``E[β]``.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
forward_periods: Forward-return horizon (rows of the time
|
|
163
|
+
axis).
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
A validated ``AnalysisConfig`` for the
|
|
167
|
+
``(COMMON, CONTINUOUS, None)`` cell.
|
|
168
|
+
"""
|
|
169
|
+
return cls(
|
|
170
|
+
FactorScope.COMMON,
|
|
171
|
+
Signal.CONTINUOUS,
|
|
172
|
+
None,
|
|
173
|
+
forward_periods=forward_periods,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
@classmethod
|
|
177
|
+
def common_sparse(cls, *, forward_periods: int = 5) -> Self:
|
|
178
|
+
"""Broadcast sparse trigger (FOMC, policy, index rebalance).
|
|
179
|
+
|
|
180
|
+
PANEL canonical: per-asset β on dummy + cross-asset t-test.
|
|
181
|
+
TIMESERIES (N=1): TS dummy regression + NW HAC SE.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
forward_periods: Forward-return horizon (rows of the time
|
|
185
|
+
axis).
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
A validated ``AnalysisConfig`` for the
|
|
189
|
+
``(COMMON, SPARSE, None)`` cell.
|
|
190
|
+
"""
|
|
191
|
+
return cls(
|
|
192
|
+
FactorScope.COMMON,
|
|
193
|
+
Signal.SPARSE,
|
|
194
|
+
None,
|
|
195
|
+
forward_periods=forward_periods,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def to_dict(self) -> dict[str, Any]:
|
|
199
|
+
"""Serialise to a JSON-compatible dict.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
A dict with string-valued enums and integer
|
|
203
|
+
``forward_periods``, suitable for JSON serialisation.
|
|
204
|
+
"""
|
|
205
|
+
return {
|
|
206
|
+
"scope": self.scope.value,
|
|
207
|
+
"signal": self.signal.value,
|
|
208
|
+
"metric": self.metric.value if self.metric is not None else None,
|
|
209
|
+
"forward_periods": self.forward_periods,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
@classmethod
|
|
213
|
+
def from_dict(cls, d: dict[str, Any]) -> Self:
|
|
214
|
+
"""Reconstruct from ``to_dict``'s output.
|
|
215
|
+
|
|
216
|
+
Goes through ``__post_init__``, so an invalid triple raises
|
|
217
|
+
``IncompatibleAxisError`` instead of silently constructing.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
d: Mapping in the shape produced by ``to_dict``.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
A validated ``AnalysisConfig``.
|
|
224
|
+
|
|
225
|
+
Raises:
|
|
226
|
+
IncompatibleAxisError: If the ``(scope, signal, metric)``
|
|
227
|
+
triple is not a legal cell.
|
|
228
|
+
"""
|
|
229
|
+
m = d.get("metric")
|
|
230
|
+
return cls(
|
|
231
|
+
scope=FactorScope(d["scope"]),
|
|
232
|
+
signal=Signal(d["signal"]),
|
|
233
|
+
metric=Metric(m) if m is not None else None,
|
|
234
|
+
forward_periods=d.get("forward_periods", 5),
|
|
235
|
+
)
|
factrix/_axis.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""v0.5 analysis-axis enums (§4.1 of refactor_api.md).
|
|
2
|
+
|
|
3
|
+
Three orthogonal user-facing axes describe an analysis cell:
|
|
4
|
+
|
|
5
|
+
- ``FactorScope`` — does the factor vary per-asset (``INDIVIDUAL``) or
|
|
6
|
+
carry a single value broadcast to every asset (``COMMON``)?
|
|
7
|
+
- ``Signal`` — continuous numeric exposure (``CONTINUOUS``) vs.
|
|
8
|
+
``{0, R}`` event triggers (``SPARSE`` — zero on non-event entries,
|
|
9
|
+
arbitrary real magnitude otherwise; canonical example ``{-1, 0, +1}``)?
|
|
10
|
+
- ``Metric`` — procedure-canonical scalar (``IC`` or ``FM``).
|
|
11
|
+
Only meaningful for ``INDIVIDUAL × CONTINUOUS``; ``None`` for the
|
|
12
|
+
remaining cells.
|
|
13
|
+
|
|
14
|
+
``Mode`` is the fourth axis used by registry keys / dispatch but is not
|
|
15
|
+
user-set: it is derived from ``N`` at evaluate-time and surfaced as
|
|
16
|
+
``Profile.mode`` for downstream pattern-match.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from enum import StrEnum
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FactorScope(StrEnum):
|
|
25
|
+
"""Does each asset have its own factor value, or do all share one?"""
|
|
26
|
+
|
|
27
|
+
INDIVIDUAL = "individual"
|
|
28
|
+
COMMON = "common"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Signal(StrEnum):
|
|
32
|
+
"""Continuous numeric exposure vs. ``{0, R}`` sparse event trigger.
|
|
33
|
+
|
|
34
|
+
Sparse columns are zero on non-event entries with arbitrary real
|
|
35
|
+
magnitude otherwise (canonical example ``{-1, 0, +1}``).
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
CONTINUOUS = "continuous"
|
|
39
|
+
SPARSE = "sparse"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Metric(StrEnum):
|
|
43
|
+
"""Procedure-canonical scalar for ``INDIVIDUAL × CONTINUOUS`` cells."""
|
|
44
|
+
|
|
45
|
+
IC = "ic"
|
|
46
|
+
FM = "fm"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Mode(StrEnum):
|
|
50
|
+
"""Sample regime, derived from ``N`` at evaluate-time.
|
|
51
|
+
|
|
52
|
+
``PANEL`` — ``N >= 2`` (multi-asset / multi-event panel).
|
|
53
|
+
``TIMESERIES`` — ``N == 1`` (single-asset time series).
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
PANEL = "panel"
|
|
57
|
+
TIMESERIES = "timeseries"
|
factrix/_codes.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""v0.5 enum codes for warnings, info notes, cell stats, and verdicts.
|
|
2
|
+
|
|
3
|
+
``WarningCode`` / ``InfoCode`` / ``StatCode`` follow the ``*Code`` suffix
|
|
4
|
+
invariant (§7.5).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from enum import StrEnum
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WarningCode(StrEnum):
|
|
13
|
+
"""Procedure-degradation flags (replaces v3 ``DegradedMode``).
|
|
14
|
+
|
|
15
|
+
Each value carries a one-line ``description`` gloss for
|
|
16
|
+
``profile.diagnose()`` consumers (review fix UX-4) — pure metadata,
|
|
17
|
+
StrEnum value identity is unchanged.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
UNRELIABLE_SE_SHORT_PERIODS = "unreliable_se_short_periods"
|
|
21
|
+
EVENT_WINDOW_OVERLAP = "event_window_overlap"
|
|
22
|
+
# Fired when ADF p > 0.1 on a CONTINUOUS factor (Stambaugh-style
|
|
23
|
+
# persistent-regressor flag, §5.2 / §7.3). Not raised for SPARSE.
|
|
24
|
+
PERSISTENT_REGRESSOR = "persistent_regressor"
|
|
25
|
+
SERIAL_CORRELATION_DETECTED = "serial_correlation_detected"
|
|
26
|
+
# Two-tier cross-asset N guards for PANEL common_continuous. Mirrors
|
|
27
|
+
# the n_periods two-tier (UNRELIABLE_SE_SHORT_PERIODS) but the axis
|
|
28
|
+
# never raises — cross-asset t-test on E[β] is well-defined for N≥2.
|
|
29
|
+
SMALL_CROSS_SECTION_N = "small_cross_section_n"
|
|
30
|
+
BORDERLINE_CROSS_SECTION_N = "borderline_cross_section_n"
|
|
31
|
+
# Fired by the (COMMON, SPARSE, PANEL) procedure when the broadcast
|
|
32
|
+
# dummy carries MIN_BROADCAST_EVENTS_HARD ≤ n_events <
|
|
33
|
+
# MIN_BROADCAST_EVENTS_WARN. Per-asset β is identifiable but
|
|
34
|
+
# the cross-event averaging is too thin for asymptotic t to be
|
|
35
|
+
# trusted. Below the HARD floor raises InsufficientSampleError instead.
|
|
36
|
+
SPARSE_COMMON_FEW_EVENTS = "sparse_common_few_events"
|
|
37
|
+
# Fired when a sparse ``factor`` column carries mixed signs but is
|
|
38
|
+
# not a clean ±1 ternary (e.g. ``{-2.5, 0, +1.3}``). The CAAR /
|
|
39
|
+
# sparse-panel statistic is the magnitude-weighted Sefcik-Thompson
|
|
40
|
+
# (1986) variant, which differs from the textbook MacKinlay (1997)
|
|
41
|
+
# signed CAAR at finite samples when negative- and positive-leg
|
|
42
|
+
# vols disagree. ``{-1, 0, +1}`` does not trigger — sign and weight
|
|
43
|
+
# semantics coincide numerically. All-non-negative columns
|
|
44
|
+
# (``{0, 1}`` / ``{0, R≥0}``) do not trigger — no flip ambiguity.
|
|
45
|
+
SPARSE_MAGNITUDE_WEIGHTED = "sparse_magnitude_weighted"
|
|
46
|
+
# Fired by ``caar`` (significance test) when the per-event-date series
|
|
47
|
+
# length sits in ``[MIN_EVENTS_HARD, MIN_EVENTS_WARN)`` — the t-stat
|
|
48
|
+
# is returned but the Brown-Warner (1985) convention treats sub-30
|
|
49
|
+
# event-date counts as power-thin for the asymptotic t-distribution.
|
|
50
|
+
# Below the HARD floor the primitive short-circuits to NaN instead.
|
|
51
|
+
FEW_EVENTS_BROWN_WARNER = "few_events_brown_warner"
|
|
52
|
+
# Fired by ``top_concentration`` when the per-date ratio series sits
|
|
53
|
+
# in ``[MIN_PORTFOLIO_PERIODS_HARD, MIN_PORTFOLIO_PERIODS_WARN)`` —
|
|
54
|
+
# the one-sided t-test on the diversification ratio is returned but
|
|
55
|
+
# ``df = n - 1 < 19`` inflates t_crit relative to the asymptotic
|
|
56
|
+
# cutoff. Below the HARD floor the primitive short-circuits to NaN.
|
|
57
|
+
BORDERLINE_PORTFOLIO_PERIODS = "borderline_portfolio_periods"
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def description(self) -> str:
|
|
61
|
+
return _WARNING_DESCRIPTIONS[self]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
_WARNING_DESCRIPTIONS: dict[WarningCode, str] = {}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
_WARNING_DESCRIPTIONS.update(
|
|
68
|
+
{
|
|
69
|
+
WarningCode.UNRELIABLE_SE_SHORT_PERIODS: "n_periods is below the WARN floor (~30); NW HAC SE may be biased. "
|
|
70
|
+
"Reused across panel time-series guards (MIN_PERIODS_WARN) and "
|
|
71
|
+
"primitive inference (MIN_FM_PERIODS_WARN); both default to 30.",
|
|
72
|
+
WarningCode.EVENT_WINDOW_OVERLAP: "Adjacent events sit within forward_periods; AR windows overlap.",
|
|
73
|
+
WarningCode.PERSISTENT_REGRESSOR: "ADF p > 0.10 on the continuous factor; β may carry Stambaugh bias.",
|
|
74
|
+
WarningCode.SERIAL_CORRELATION_DETECTED: "Ljung-Box p < 0.05 on residuals; NW lag may be under-set.",
|
|
75
|
+
WarningCode.SMALL_CROSS_SECTION_N: "PANEL cross-asset t-test with n_assets < MIN_ASSETS (10); "
|
|
76
|
+
"df=n_assets-1 too low — t_crit at n_assets=3 ≈ 4.30 "
|
|
77
|
+
"(+119% vs asymptotic 1.96).",
|
|
78
|
+
WarningCode.BORDERLINE_CROSS_SECTION_N: "PANEL cross-asset t-test with MIN_ASSETS ≤ n_assets < "
|
|
79
|
+
"MIN_ASSETS_WARN (10..29); residual t_crit inflation "
|
|
80
|
+
"5–15% — read borderline p-values cautiously.",
|
|
81
|
+
WarningCode.SPARSE_COMMON_FEW_EVENTS: "(COMMON, SPARSE, PANEL) broadcast dummy has "
|
|
82
|
+
"MIN_BROADCAST_EVENTS_HARD ≤ n_events < MIN_BROADCAST_EVENTS_WARN "
|
|
83
|
+
"(5..19); per-asset β estimable but cross-event averaging too thin "
|
|
84
|
+
"for asymptotic t.",
|
|
85
|
+
WarningCode.SPARSE_MAGNITUDE_WEIGHTED: "Sparse factor column is mixed-sign and not a "
|
|
86
|
+
"clean ±1 ternary; statistic is magnitude-weighted (Sefcik-Thompson) "
|
|
87
|
+
"rather than textbook MacKinlay signed CAAR — apply .sign() before "
|
|
88
|
+
"calling for sign-flip semantics.",
|
|
89
|
+
WarningCode.FEW_EVENTS_BROWN_WARNER: "CAAR significance test with MIN_EVENTS_HARD ≤ "
|
|
90
|
+
"n_event_dates < MIN_EVENTS_WARN (4..29); t-stat returned but "
|
|
91
|
+
"Brown-Warner (1985) convention treats sub-30 events as power-thin "
|
|
92
|
+
"for the asymptotic t-distribution — read borderline p-values cautiously.",
|
|
93
|
+
WarningCode.BORDERLINE_PORTFOLIO_PERIODS: "top_concentration with MIN_PORTFOLIO_PERIODS_HARD "
|
|
94
|
+
"≤ n_periods < MIN_PORTFOLIO_PERIODS_WARN (3..19); one-sided t-test "
|
|
95
|
+
"on the per-date diversification ratio is returned but df=n-1 inflates "
|
|
96
|
+
"t_crit relative to the asymptotic cutoff.",
|
|
97
|
+
}
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def cross_section_tier(n_assets: int) -> WarningCode | None:
|
|
102
|
+
"""Map an inference-stage cross-asset N to the appropriate warning code.
|
|
103
|
+
|
|
104
|
+
The argument is the **inference-stage** N — the count of assets
|
|
105
|
+
actually entering the cross-asset test, not the panel-union
|
|
106
|
+
``FactorProfile.n_assets`` surface field. For ``(COMMON, *, None,
|
|
107
|
+
PANEL)`` cells the two differ: ``compute_ts_betas`` drops assets
|
|
108
|
+
with fewer than ``MIN_TS_OBS`` non-null observations, so the union
|
|
109
|
+
can be materially larger than the post-filter count that drives
|
|
110
|
+
``primary_p``'s ``dof = N - 1``. Callers (``suggest_config``,
|
|
111
|
+
``_compute_common_panel``) therefore pre-filter before calling.
|
|
112
|
+
|
|
113
|
+
Tiers are mutually exclusive — SMALL is strictly more severe than
|
|
114
|
+
BORDERLINE — so callers can membership-check the more severe code
|
|
115
|
+
without an else branch. Returns ``None`` at ``n_assets ≥
|
|
116
|
+
MIN_ASSETS_WARN`` (clean) or ``n_assets < 2`` (PANEL impossible
|
|
117
|
+
by upstream mode routing; defensive).
|
|
118
|
+
"""
|
|
119
|
+
from factrix._stats.constants import MIN_ASSETS, MIN_ASSETS_WARN
|
|
120
|
+
|
|
121
|
+
if 2 <= n_assets < MIN_ASSETS:
|
|
122
|
+
return WarningCode.SMALL_CROSS_SECTION_N
|
|
123
|
+
if MIN_ASSETS <= n_assets < MIN_ASSETS_WARN:
|
|
124
|
+
return WarningCode.BORDERLINE_CROSS_SECTION_N
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class InfoCode(StrEnum):
|
|
129
|
+
"""Neutral facts surfaced to the caller — not warnings, not errors."""
|
|
130
|
+
|
|
131
|
+
SCOPE_AXIS_COLLAPSED = "scope_axis_collapsed"
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def description(self) -> str:
|
|
135
|
+
return _INFO_DESCRIPTIONS[self]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
_INFO_DESCRIPTIONS: dict[InfoCode, str] = {
|
|
139
|
+
InfoCode.SCOPE_AXIS_COLLAPSED: "N=1 collapsed scope axis; routed via _SCOPE_COLLAPSED sentinel.",
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class StatCode(StrEnum):
|
|
144
|
+
"""Cell-specific scalar stats keyed in ``FactorProfile.stats``.
|
|
145
|
+
|
|
146
|
+
Adding a new metric → add an enum value here + populate it in the
|
|
147
|
+
procedure. Profile schema does not grow. Stats fall in three
|
|
148
|
+
families:
|
|
149
|
+
|
|
150
|
+
- **p-values**: identifier ends in ``_p`` (``IC_P`` / ``FM_LAMBDA_P``
|
|
151
|
+
/ ``TS_BETA_P`` / ``CAAR_P`` plus the diagnostic-only
|
|
152
|
+
``FACTOR_ADF_P`` / ``LJUNG_BOX_P``). ``is_p_value`` returns
|
|
153
|
+
``True``. These are the only codes ``multi_factor.bhy`` will
|
|
154
|
+
accept as a ``gate=`` override (BHY step-up requires probabilities
|
|
155
|
+
— feeding it t-stats yields nonsense FDR control).
|
|
156
|
+
- **t-stats** / effect-size means / lag counts / HHI: ``is_p_value``
|
|
157
|
+
returns ``False``. ``profile.verdict(gate=...)`` accepts these
|
|
158
|
+
(the comparison is generic ``value < threshold`` — interpretation
|
|
159
|
+
is the caller's call) but ``bhy(gate=...)`` rejects them.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
IC_MEAN = "ic_mean"
|
|
163
|
+
IC_T_NW = "ic_t_nw"
|
|
164
|
+
IC_P = "ic_p"
|
|
165
|
+
FM_LAMBDA_MEAN = "fm_lambda_mean"
|
|
166
|
+
FM_LAMBDA_T_NW = "fm_lambda_t_nw"
|
|
167
|
+
FM_LAMBDA_P = "fm_lambda_p"
|
|
168
|
+
TS_BETA = "ts_beta"
|
|
169
|
+
TS_BETA_T_NW = "ts_beta_t_nw"
|
|
170
|
+
TS_BETA_P = "ts_beta_p"
|
|
171
|
+
CAAR_MEAN = "caar_mean"
|
|
172
|
+
CAAR_T_NW = "caar_t_nw"
|
|
173
|
+
CAAR_P = "caar_p"
|
|
174
|
+
FACTOR_ADF_P = "factor_adf_p"
|
|
175
|
+
LJUNG_BOX_P = "ljung_box_p"
|
|
176
|
+
EVENT_TEMPORAL_HHI = "event_temporal_hhi"
|
|
177
|
+
NW_LAGS_USED = "nw_lags_used"
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def is_p_value(self) -> bool:
|
|
181
|
+
"""``True`` iff this stat is a probability in [0, 1].
|
|
182
|
+
|
|
183
|
+
Used by ``multi_factor.bhy`` to gatekeep the ``gate=`` override
|
|
184
|
+
— BHY step-up math requires p-values, so feeding a t-stat would
|
|
185
|
+
silently corrupt FDR control.
|
|
186
|
+
"""
|
|
187
|
+
return self.value.endswith("_p")
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class Verdict(StrEnum):
|
|
191
|
+
"""Procedure-canonical pass/fail outcome of ``Profile.verdict()``."""
|
|
192
|
+
|
|
193
|
+
PASS = "pass"
|
|
194
|
+
FAIL = "fail"
|