favar 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- favar/__init__.py +8 -0
- favar/factor.py +105 -0
- favar/model.py +240 -0
- favar/order_selection.py +71 -0
- favar/results.py +332 -0
- favar/summary.py +160 -0
- favar-0.1.0.dist-info/METADATA +631 -0
- favar-0.1.0.dist-info/RECORD +11 -0
- favar-0.1.0.dist-info/WHEEL +5 -0
- favar-0.1.0.dist-info/licenses/LICENSE +21 -0
- favar-0.1.0.dist-info/top_level.txt +1 -0
favar/__init__.py
ADDED
favar/factor.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Factor extraction and input preparation helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def as_float_dataframe(data, name: str) -> pd.DataFrame:
|
|
10
|
+
"""Return *data* as a numeric DataFrame with unique column names."""
|
|
11
|
+
if not isinstance(data, pd.DataFrame):
|
|
12
|
+
data = pd.DataFrame(data)
|
|
13
|
+
|
|
14
|
+
if data.ndim != 2:
|
|
15
|
+
raise ValueError(f"{name} must be two-dimensional.")
|
|
16
|
+
if data.shape[1] == 0:
|
|
17
|
+
raise ValueError(f"{name} must contain at least one column.")
|
|
18
|
+
if data.columns.has_duplicates:
|
|
19
|
+
raise ValueError(f"{name} has duplicate column names.")
|
|
20
|
+
|
|
21
|
+
out = data.copy()
|
|
22
|
+
try:
|
|
23
|
+
out = out.apply(pd.to_numeric, errors="raise").astype(float)
|
|
24
|
+
except Exception as exc: # pragma: no cover - pandas exception details vary
|
|
25
|
+
raise ValueError(f"{name} must contain only numeric values.") from exc
|
|
26
|
+
return out
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def align_panels(x: pd.DataFrame, y: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
30
|
+
"""Align X and Y on the time index and reject missing values."""
|
|
31
|
+
if not x.index.equals(y.index):
|
|
32
|
+
x, y = x.align(y, join="inner", axis=0)
|
|
33
|
+
if len(x) == 0:
|
|
34
|
+
raise ValueError("X and Y do not share any index values.")
|
|
35
|
+
|
|
36
|
+
if len(x) != len(y):
|
|
37
|
+
raise ValueError("X and Y must have the same number of observations.")
|
|
38
|
+
if x.isna().any().any():
|
|
39
|
+
raise ValueError("X contains missing values after alignment.")
|
|
40
|
+
if y.isna().any().any():
|
|
41
|
+
raise ValueError("Y contains missing values after alignment.")
|
|
42
|
+
return x, y
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def standardize_frame(x: pd.DataFrame) -> tuple[pd.DataFrame, np.ndarray, np.ndarray]:
|
|
46
|
+
"""Standardize columns with population standard deviations."""
|
|
47
|
+
values = x.to_numpy(dtype=float)
|
|
48
|
+
mean = values.mean(axis=0)
|
|
49
|
+
std = values.std(axis=0, ddof=0)
|
|
50
|
+
std[std == 0] = 1.0
|
|
51
|
+
standardized = (values - mean) / std
|
|
52
|
+
return pd.DataFrame(standardized, columns=x.columns, index=x.index), mean, std
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def principal_components(z, k_factors: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
56
|
+
"""Extract principal-component scores using the BBE normalization.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
z : array_like
|
|
61
|
+
Standardized information panel with shape ``(nobs, nseries)``.
|
|
62
|
+
k_factors : int
|
|
63
|
+
Number of principal components.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
factors : ndarray
|
|
68
|
+
Principal-component scores, normalized as ``U S / sqrt(N)``.
|
|
69
|
+
loadings : ndarray
|
|
70
|
+
Right singular vectors for the selected components.
|
|
71
|
+
explained_variance_ratio : ndarray
|
|
72
|
+
Share of panel variation captured by each selected component.
|
|
73
|
+
"""
|
|
74
|
+
z = np.asarray(z, dtype=float)
|
|
75
|
+
if z.ndim != 2:
|
|
76
|
+
raise ValueError("z must be two-dimensional.")
|
|
77
|
+
nobs, nseries = z.shape
|
|
78
|
+
if k_factors < 1:
|
|
79
|
+
raise ValueError("k_factors must be at least 1.")
|
|
80
|
+
if k_factors > min(nobs, nseries):
|
|
81
|
+
raise ValueError("k_factors cannot exceed min(nobs, nseries).")
|
|
82
|
+
|
|
83
|
+
u, s, vt = np.linalg.svd(z, full_matrices=False)
|
|
84
|
+
factors = u[:, :k_factors] * s[:k_factors] / np.sqrt(nseries)
|
|
85
|
+
loadings = vt[:k_factors].T
|
|
86
|
+
total = np.sum(s**2)
|
|
87
|
+
ratio = (s[:k_factors] ** 2) / total if total > 0 else np.zeros(k_factors)
|
|
88
|
+
return factors, loadings, ratio
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def validate_slow_columns(
|
|
92
|
+
x_columns: pd.Index, slow_columns: list[str] | None, k_factors: int
|
|
93
|
+
) -> list[str]:
|
|
94
|
+
"""Validate slow-moving column names and return the effective list."""
|
|
95
|
+
if slow_columns is None:
|
|
96
|
+
slow = list(x_columns)
|
|
97
|
+
else:
|
|
98
|
+
missing = [col for col in slow_columns if col not in x_columns]
|
|
99
|
+
if missing:
|
|
100
|
+
raise ValueError(f"slow_columns contains unknown columns: {missing}")
|
|
101
|
+
slow = list(slow_columns)
|
|
102
|
+
|
|
103
|
+
if len(slow) < k_factors:
|
|
104
|
+
raise ValueError("slow_columns must contain at least k_factors columns.")
|
|
105
|
+
return slow
|
favar/model.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""FAVAR model classes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from statsmodels.tsa.api import VAR
|
|
8
|
+
|
|
9
|
+
from .factor import (
|
|
10
|
+
align_panels,
|
|
11
|
+
as_float_dataframe,
|
|
12
|
+
principal_components,
|
|
13
|
+
standardize_frame,
|
|
14
|
+
validate_slow_columns,
|
|
15
|
+
)
|
|
16
|
+
from .order_selection import FAVAROrderSelection
|
|
17
|
+
from .results import FAVARResults
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FAVAR:
|
|
21
|
+
"""Factor-Augmented VAR estimated with the BBE two-step procedure.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
X : DataFrame or array_like
|
|
26
|
+
Large information panel with shape ``(nobs, nseries)``.
|
|
27
|
+
Y : DataFrame or array_like
|
|
28
|
+
Observed variables included directly in the VAR. Must contain
|
|
29
|
+
``policy_var``.
|
|
30
|
+
policy_var : str
|
|
31
|
+
Policy instrument ordered last for recursive identification.
|
|
32
|
+
k_factors : int, default 3
|
|
33
|
+
Number of factors extracted from ``X``.
|
|
34
|
+
slow_columns : sequence[str], optional
|
|
35
|
+
Columns of ``X`` treated as slow-moving. If omitted, all X columns are
|
|
36
|
+
used. Supplying this list is recommended for monetary-policy studies.
|
|
37
|
+
standardize : bool, default True
|
|
38
|
+
Standardize ``X`` before principal components.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
X,
|
|
44
|
+
Y,
|
|
45
|
+
policy_var: str,
|
|
46
|
+
k_factors: int = 3,
|
|
47
|
+
slow_columns: list[str] | None = None,
|
|
48
|
+
standardize: bool = True,
|
|
49
|
+
):
|
|
50
|
+
self.X = as_float_dataframe(X, "X")
|
|
51
|
+
self.Y = as_float_dataframe(Y, "Y")
|
|
52
|
+
self.X, self.Y = align_panels(self.X, self.Y)
|
|
53
|
+
|
|
54
|
+
if policy_var not in self.Y.columns:
|
|
55
|
+
raise ValueError("policy_var must be a column of Y.")
|
|
56
|
+
self.policy_var = policy_var
|
|
57
|
+
self.k_factors = int(k_factors)
|
|
58
|
+
self.slow_columns = (
|
|
59
|
+
list(slow_columns) if slow_columns is not None else None
|
|
60
|
+
)
|
|
61
|
+
self.standardize = bool(standardize)
|
|
62
|
+
self._results: FAVARResults | None = None
|
|
63
|
+
|
|
64
|
+
def _prepare_system(self):
|
|
65
|
+
"""Run the factor steps and build the augmented FAVAR system."""
|
|
66
|
+
X = self.X
|
|
67
|
+
Y = self.Y
|
|
68
|
+
nobs, nseries = X.shape
|
|
69
|
+
|
|
70
|
+
if self.standardize:
|
|
71
|
+
Xs, x_mean, x_std = standardize_frame(X)
|
|
72
|
+
else:
|
|
73
|
+
Xs = X.copy()
|
|
74
|
+
x_mean = np.zeros(nseries)
|
|
75
|
+
x_std = np.ones(nseries)
|
|
76
|
+
|
|
77
|
+
slow_columns = validate_slow_columns(
|
|
78
|
+
X.columns, self.slow_columns, self.k_factors
|
|
79
|
+
)
|
|
80
|
+
factor_names = [f"F{i + 1}" for i in range(self.k_factors)]
|
|
81
|
+
|
|
82
|
+
f0, pc_loadings, pc_var_ratio = principal_components(
|
|
83
|
+
Xs.to_numpy(), self.k_factors
|
|
84
|
+
)
|
|
85
|
+
fslow, slow_pc_loadings, slow_pc_var_ratio = principal_components(
|
|
86
|
+
Xs.loc[:, slow_columns].to_numpy(), self.k_factors
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
policy = Y[self.policy_var].to_numpy(dtype=float)
|
|
90
|
+
cleaning_design = np.column_stack([np.ones(nobs), policy, fslow])
|
|
91
|
+
cleaning_coef, *_ = np.linalg.lstsq(cleaning_design, f0, rcond=None)
|
|
92
|
+
policy_cleaning_coef = cleaning_coef[1, :]
|
|
93
|
+
factors = f0 - np.outer(policy, policy_cleaning_coef)
|
|
94
|
+
|
|
95
|
+
y_order = [col for col in Y.columns if col != self.policy_var]
|
|
96
|
+
y_order.append(self.policy_var)
|
|
97
|
+
order = factor_names + y_order
|
|
98
|
+
y_ordered = Y.loc[:, y_order]
|
|
99
|
+
var_data = pd.DataFrame(
|
|
100
|
+
np.column_stack([factors, y_ordered.to_numpy(dtype=float)]),
|
|
101
|
+
columns=order,
|
|
102
|
+
index=X.index,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
measurement_design = np.column_stack(
|
|
106
|
+
[np.ones(nobs), factors, y_ordered.to_numpy(dtype=float)]
|
|
107
|
+
)
|
|
108
|
+
measurement_coef, *_ = np.linalg.lstsq(
|
|
109
|
+
measurement_design, Xs.to_numpy(dtype=float), rcond=None
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
"var_data": var_data,
|
|
114
|
+
"order": order,
|
|
115
|
+
"factor_names": factor_names,
|
|
116
|
+
"y_names": y_order,
|
|
117
|
+
"x_names": list(X.columns),
|
|
118
|
+
"slow_columns": slow_columns,
|
|
119
|
+
"factors": factors,
|
|
120
|
+
"principal_components": f0,
|
|
121
|
+
"slow_principal_components": fslow,
|
|
122
|
+
"pc_loadings": pc_loadings,
|
|
123
|
+
"slow_pc_loadings": slow_pc_loadings,
|
|
124
|
+
"explained_variance_ratio": pc_var_ratio,
|
|
125
|
+
"slow_explained_variance_ratio": slow_pc_var_ratio,
|
|
126
|
+
"cleaning_coefficients": cleaning_coef,
|
|
127
|
+
"policy_cleaning_coefficients": policy_cleaning_coef,
|
|
128
|
+
"measurement_intercept": measurement_coef[0, :],
|
|
129
|
+
"measurement_loadings": measurement_coef[1:, :],
|
|
130
|
+
"x_mean": x_mean,
|
|
131
|
+
"x_std": x_std,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
def select_order(self, maxlags: int = 12, trend: str = "c"):
|
|
135
|
+
"""Select the lag order of the augmented FAVAR system.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
FAVAROrderSelection
|
|
140
|
+
Object with selected orders, a DataFrame representation, and a
|
|
141
|
+
printable summary table.
|
|
142
|
+
"""
|
|
143
|
+
prepared = self._prepare_system()
|
|
144
|
+
var_model = VAR(prepared["var_data"])
|
|
145
|
+
order_results = var_model.select_order(maxlags=maxlags, trend=trend)
|
|
146
|
+
return FAVAROrderSelection(
|
|
147
|
+
order_results,
|
|
148
|
+
k_factors=self.k_factors,
|
|
149
|
+
n_x=len(prepared["x_names"]),
|
|
150
|
+
n_slow=len(prepared["slow_columns"]),
|
|
151
|
+
policy_var=self.policy_var,
|
|
152
|
+
maxlags=maxlags,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def fit(
|
|
156
|
+
self,
|
|
157
|
+
lags: int | None = 13,
|
|
158
|
+
select_order: str | None = None,
|
|
159
|
+
maxlags: int | None = None,
|
|
160
|
+
trend: str = "c",
|
|
161
|
+
verbose: bool = False,
|
|
162
|
+
):
|
|
163
|
+
"""Estimate the FAVAR.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
lags : int, default 13
|
|
168
|
+
Fixed VAR lag order. Ignored when ``select_order`` is provided.
|
|
169
|
+
select_order : {"aic", "bic", "hqic", "fpe"}, optional
|
|
170
|
+
Information criterion used to select the VAR lag order.
|
|
171
|
+
maxlags : int, optional
|
|
172
|
+
Maximum lag considered when ``select_order`` is used. If omitted,
|
|
173
|
+
``lags`` is used.
|
|
174
|
+
trend : {"c", "ct", "ctt", "n"}, default "c"
|
|
175
|
+
Deterministic terms in the VAR step.
|
|
176
|
+
verbose : bool, default False
|
|
177
|
+
Print lag-selection details when available.
|
|
178
|
+
"""
|
|
179
|
+
prepared = self._prepare_system()
|
|
180
|
+
var_model = VAR(prepared["var_data"])
|
|
181
|
+
if select_order is None:
|
|
182
|
+
var_results = var_model.fit(
|
|
183
|
+
maxlags=lags, ic=None, trend=trend, verbose=verbose
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
lag_cap = maxlags if maxlags is not None else lags
|
|
187
|
+
var_results = var_model.fit(
|
|
188
|
+
maxlags=lag_cap, ic=select_order, trend=trend, verbose=verbose
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
self._results = FAVARResults(
|
|
192
|
+
model=self,
|
|
193
|
+
var_results=var_results,
|
|
194
|
+
var_data=prepared["var_data"],
|
|
195
|
+
order=prepared["order"],
|
|
196
|
+
factor_names=prepared["factor_names"],
|
|
197
|
+
y_names=prepared["y_names"],
|
|
198
|
+
x_names=prepared["x_names"],
|
|
199
|
+
slow_columns=prepared["slow_columns"],
|
|
200
|
+
policy_var=self.policy_var,
|
|
201
|
+
factors=prepared["factors"],
|
|
202
|
+
principal_components=prepared["principal_components"],
|
|
203
|
+
slow_principal_components=prepared["slow_principal_components"],
|
|
204
|
+
pc_loadings=prepared["pc_loadings"],
|
|
205
|
+
slow_pc_loadings=prepared["slow_pc_loadings"],
|
|
206
|
+
explained_variance_ratio=prepared["explained_variance_ratio"],
|
|
207
|
+
slow_explained_variance_ratio=prepared["slow_explained_variance_ratio"],
|
|
208
|
+
cleaning_coefficients=prepared["cleaning_coefficients"],
|
|
209
|
+
policy_cleaning_coefficients=prepared["policy_cleaning_coefficients"],
|
|
210
|
+
measurement_intercept=prepared["measurement_intercept"],
|
|
211
|
+
measurement_loadings=prepared["measurement_loadings"],
|
|
212
|
+
x_mean=prepared["x_mean"],
|
|
213
|
+
x_std=prepared["x_std"],
|
|
214
|
+
index=self.X.index,
|
|
215
|
+
standardize=self.standardize,
|
|
216
|
+
)
|
|
217
|
+
return self._results
|
|
218
|
+
|
|
219
|
+
def _check_results(self) -> FAVARResults:
|
|
220
|
+
if self._results is None:
|
|
221
|
+
raise RuntimeError("Call fit() before accessing results.")
|
|
222
|
+
return self._results
|
|
223
|
+
|
|
224
|
+
def summary(self):
|
|
225
|
+
return self._check_results().summary()
|
|
226
|
+
|
|
227
|
+
def forecast(self, steps: int, alpha: float = 0.05, confidence_level=None):
|
|
228
|
+
return self._check_results().forecast(
|
|
229
|
+
steps, alpha=alpha, confidence_level=confidence_level
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
def impulse_response(self, periods: int = 10, shock: str | None = None, **kwargs):
|
|
233
|
+
return self._check_results().impulse_response(periods, shock=shock, **kwargs)
|
|
234
|
+
|
|
235
|
+
def panel_impulse_response(
|
|
236
|
+
self, periods: int = 10, shock: str | None = None, **kwargs
|
|
237
|
+
):
|
|
238
|
+
return self._check_results().panel_impulse_response(
|
|
239
|
+
periods, shock=shock, **kwargs
|
|
240
|
+
)
|
favar/order_selection.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Lag-order selection results for FAVAR models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from statsmodels.iolib.table import SimpleTable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FAVAROrderSelection:
|
|
10
|
+
"""Lag-order selection table for the augmented FAVAR system."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
order_results,
|
|
15
|
+
k_factors: int,
|
|
16
|
+
n_x: int,
|
|
17
|
+
n_slow: int,
|
|
18
|
+
policy_var: str,
|
|
19
|
+
maxlags: int,
|
|
20
|
+
):
|
|
21
|
+
self.order_results = order_results
|
|
22
|
+
self.ics = order_results.ics
|
|
23
|
+
self.selected_orders = dict(order_results.selected_orders)
|
|
24
|
+
self.aic = self.selected_orders["aic"]
|
|
25
|
+
self.bic = self.selected_orders["bic"]
|
|
26
|
+
self.fpe = self.selected_orders["fpe"]
|
|
27
|
+
self.hqic = self.selected_orders["hqic"]
|
|
28
|
+
self.k_factors = k_factors
|
|
29
|
+
self.n_x = n_x
|
|
30
|
+
self.n_slow = n_slow
|
|
31
|
+
self.policy_var = policy_var
|
|
32
|
+
self.maxlags = maxlags
|
|
33
|
+
|
|
34
|
+
def __repr__(self):
|
|
35
|
+
return str(self.summary())
|
|
36
|
+
|
|
37
|
+
def __str__(self):
|
|
38
|
+
return (
|
|
39
|
+
"<FAVAROrderSelection: "
|
|
40
|
+
f"AIC={self.aic}, BIC={self.bic}, FPE={self.fpe}, HQIC={self.hqic}>"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def to_frame(self, mark_min: bool = False) -> pd.DataFrame:
|
|
44
|
+
"""Return the information criteria as a DataFrame."""
|
|
45
|
+
columns = ["aic", "bic", "fpe", "hqic"]
|
|
46
|
+
nrows = len(self.ics[columns[0]])
|
|
47
|
+
p_min = self.maxlags - nrows + 1
|
|
48
|
+
index = range(p_min, self.maxlags + 1)
|
|
49
|
+
frame = pd.DataFrame(
|
|
50
|
+
{col.upper(): self.ics[col] for col in columns}, index=index
|
|
51
|
+
)
|
|
52
|
+
if mark_min:
|
|
53
|
+
out = pd.DataFrame(index=frame.index, columns=frame.columns, dtype=object)
|
|
54
|
+
for row in frame.index:
|
|
55
|
+
for col in frame.columns:
|
|
56
|
+
out.loc[row, col] = f"{frame.loc[row, col]:#.4g}"
|
|
57
|
+
for col, selected in self.selected_orders.items():
|
|
58
|
+
out.loc[selected, col.upper()] = f"{frame.loc[selected, col.upper()]:#.4g}*"
|
|
59
|
+
return out
|
|
60
|
+
return frame
|
|
61
|
+
|
|
62
|
+
def summary(self):
|
|
63
|
+
"""Return a printable order-selection table."""
|
|
64
|
+
frame = self.to_frame(mark_min=True)
|
|
65
|
+
table = SimpleTable(
|
|
66
|
+
frame.to_numpy(dtype=object),
|
|
67
|
+
headers=list(frame.columns),
|
|
68
|
+
stubs=[str(i) for i in frame.index],
|
|
69
|
+
title="FAVAR Lag Order Selection (* highlights the minimums)",
|
|
70
|
+
)
|
|
71
|
+
return table
|