favar 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
favar/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Factor-Augmented Vector Autoregression (FAVAR)."""
2
+
3
+ from .model import FAVAR
4
+ from .order_selection import FAVAROrderSelection
5
+ from .results import FAVARResults
6
+
7
+ __all__ = ["FAVAR", "FAVAROrderSelection", "FAVARResults"]
8
+ __version__ = "0.1.0"
favar/factor.py ADDED
@@ -0,0 +1,105 @@
1
+ """Factor extraction and input preparation helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+
9
+ def as_float_dataframe(data, name: str) -> pd.DataFrame:
10
+ """Return *data* as a numeric DataFrame with unique column names."""
11
+ if not isinstance(data, pd.DataFrame):
12
+ data = pd.DataFrame(data)
13
+
14
+ if data.ndim != 2:
15
+ raise ValueError(f"{name} must be two-dimensional.")
16
+ if data.shape[1] == 0:
17
+ raise ValueError(f"{name} must contain at least one column.")
18
+ if data.columns.has_duplicates:
19
+ raise ValueError(f"{name} has duplicate column names.")
20
+
21
+ out = data.copy()
22
+ try:
23
+ out = out.apply(pd.to_numeric, errors="raise").astype(float)
24
+ except Exception as exc: # pragma: no cover - pandas exception details vary
25
+ raise ValueError(f"{name} must contain only numeric values.") from exc
26
+ return out
27
+
28
+
29
+ def align_panels(x: pd.DataFrame, y: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
30
+ """Align X and Y on the time index and reject missing values."""
31
+ if not x.index.equals(y.index):
32
+ x, y = x.align(y, join="inner", axis=0)
33
+ if len(x) == 0:
34
+ raise ValueError("X and Y do not share any index values.")
35
+
36
+ if len(x) != len(y):
37
+ raise ValueError("X and Y must have the same number of observations.")
38
+ if x.isna().any().any():
39
+ raise ValueError("X contains missing values after alignment.")
40
+ if y.isna().any().any():
41
+ raise ValueError("Y contains missing values after alignment.")
42
+ return x, y
43
+
44
+
45
+ def standardize_frame(x: pd.DataFrame) -> tuple[pd.DataFrame, np.ndarray, np.ndarray]:
46
+ """Standardize columns with population standard deviations."""
47
+ values = x.to_numpy(dtype=float)
48
+ mean = values.mean(axis=0)
49
+ std = values.std(axis=0, ddof=0)
50
+ std[std == 0] = 1.0
51
+ standardized = (values - mean) / std
52
+ return pd.DataFrame(standardized, columns=x.columns, index=x.index), mean, std
53
+
54
+
55
+ def principal_components(z, k_factors: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
56
+ """Extract principal-component scores using the BBE normalization.
57
+
58
+ Parameters
59
+ ----------
60
+ z : array_like
61
+ Standardized information panel with shape ``(nobs, nseries)``.
62
+ k_factors : int
63
+ Number of principal components.
64
+
65
+ Returns
66
+ -------
67
+ factors : ndarray
68
+ Principal-component scores, normalized as ``U S / sqrt(N)``.
69
+ loadings : ndarray
70
+ Right singular vectors for the selected components.
71
+ explained_variance_ratio : ndarray
72
+ Share of panel variation captured by each selected component.
73
+ """
74
+ z = np.asarray(z, dtype=float)
75
+ if z.ndim != 2:
76
+ raise ValueError("z must be two-dimensional.")
77
+ nobs, nseries = z.shape
78
+ if k_factors < 1:
79
+ raise ValueError("k_factors must be at least 1.")
80
+ if k_factors > min(nobs, nseries):
81
+ raise ValueError("k_factors cannot exceed min(nobs, nseries).")
82
+
83
+ u, s, vt = np.linalg.svd(z, full_matrices=False)
84
+ factors = u[:, :k_factors] * s[:k_factors] / np.sqrt(nseries)
85
+ loadings = vt[:k_factors].T
86
+ total = np.sum(s**2)
87
+ ratio = (s[:k_factors] ** 2) / total if total > 0 else np.zeros(k_factors)
88
+ return factors, loadings, ratio
89
+
90
+
91
+ def validate_slow_columns(
92
+ x_columns: pd.Index, slow_columns: list[str] | None, k_factors: int
93
+ ) -> list[str]:
94
+ """Validate slow-moving column names and return the effective list."""
95
+ if slow_columns is None:
96
+ slow = list(x_columns)
97
+ else:
98
+ missing = [col for col in slow_columns if col not in x_columns]
99
+ if missing:
100
+ raise ValueError(f"slow_columns contains unknown columns: {missing}")
101
+ slow = list(slow_columns)
102
+
103
+ if len(slow) < k_factors:
104
+ raise ValueError("slow_columns must contain at least k_factors columns.")
105
+ return slow
favar/model.py ADDED
@@ -0,0 +1,240 @@
1
+ """FAVAR model classes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from statsmodels.tsa.api import VAR
8
+
9
+ from .factor import (
10
+ align_panels,
11
+ as_float_dataframe,
12
+ principal_components,
13
+ standardize_frame,
14
+ validate_slow_columns,
15
+ )
16
+ from .order_selection import FAVAROrderSelection
17
+ from .results import FAVARResults
18
+
19
+
20
+ class FAVAR:
21
+ """Factor-Augmented VAR estimated with the BBE two-step procedure.
22
+
23
+ Parameters
24
+ ----------
25
+ X : DataFrame or array_like
26
+ Large information panel with shape ``(nobs, nseries)``.
27
+ Y : DataFrame or array_like
28
+ Observed variables included directly in the VAR. Must contain
29
+ ``policy_var``.
30
+ policy_var : str
31
+ Policy instrument ordered last for recursive identification.
32
+ k_factors : int, default 3
33
+ Number of factors extracted from ``X``.
34
+ slow_columns : sequence[str], optional
35
+ Columns of ``X`` treated as slow-moving. If omitted, all X columns are
36
+ used. Supplying this list is recommended for monetary-policy studies.
37
+ standardize : bool, default True
38
+ Standardize ``X`` before principal components.
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ X,
44
+ Y,
45
+ policy_var: str,
46
+ k_factors: int = 3,
47
+ slow_columns: list[str] | None = None,
48
+ standardize: bool = True,
49
+ ):
50
+ self.X = as_float_dataframe(X, "X")
51
+ self.Y = as_float_dataframe(Y, "Y")
52
+ self.X, self.Y = align_panels(self.X, self.Y)
53
+
54
+ if policy_var not in self.Y.columns:
55
+ raise ValueError("policy_var must be a column of Y.")
56
+ self.policy_var = policy_var
57
+ self.k_factors = int(k_factors)
58
+ self.slow_columns = (
59
+ list(slow_columns) if slow_columns is not None else None
60
+ )
61
+ self.standardize = bool(standardize)
62
+ self._results: FAVARResults | None = None
63
+
64
+ def _prepare_system(self):
65
+ """Run the factor steps and build the augmented FAVAR system."""
66
+ X = self.X
67
+ Y = self.Y
68
+ nobs, nseries = X.shape
69
+
70
+ if self.standardize:
71
+ Xs, x_mean, x_std = standardize_frame(X)
72
+ else:
73
+ Xs = X.copy()
74
+ x_mean = np.zeros(nseries)
75
+ x_std = np.ones(nseries)
76
+
77
+ slow_columns = validate_slow_columns(
78
+ X.columns, self.slow_columns, self.k_factors
79
+ )
80
+ factor_names = [f"F{i + 1}" for i in range(self.k_factors)]
81
+
82
+ f0, pc_loadings, pc_var_ratio = principal_components(
83
+ Xs.to_numpy(), self.k_factors
84
+ )
85
+ fslow, slow_pc_loadings, slow_pc_var_ratio = principal_components(
86
+ Xs.loc[:, slow_columns].to_numpy(), self.k_factors
87
+ )
88
+
89
+ policy = Y[self.policy_var].to_numpy(dtype=float)
90
+ cleaning_design = np.column_stack([np.ones(nobs), policy, fslow])
91
+ cleaning_coef, *_ = np.linalg.lstsq(cleaning_design, f0, rcond=None)
92
+ policy_cleaning_coef = cleaning_coef[1, :]
93
+ factors = f0 - np.outer(policy, policy_cleaning_coef)
94
+
95
+ y_order = [col for col in Y.columns if col != self.policy_var]
96
+ y_order.append(self.policy_var)
97
+ order = factor_names + y_order
98
+ y_ordered = Y.loc[:, y_order]
99
+ var_data = pd.DataFrame(
100
+ np.column_stack([factors, y_ordered.to_numpy(dtype=float)]),
101
+ columns=order,
102
+ index=X.index,
103
+ )
104
+
105
+ measurement_design = np.column_stack(
106
+ [np.ones(nobs), factors, y_ordered.to_numpy(dtype=float)]
107
+ )
108
+ measurement_coef, *_ = np.linalg.lstsq(
109
+ measurement_design, Xs.to_numpy(dtype=float), rcond=None
110
+ )
111
+
112
+ return {
113
+ "var_data": var_data,
114
+ "order": order,
115
+ "factor_names": factor_names,
116
+ "y_names": y_order,
117
+ "x_names": list(X.columns),
118
+ "slow_columns": slow_columns,
119
+ "factors": factors,
120
+ "principal_components": f0,
121
+ "slow_principal_components": fslow,
122
+ "pc_loadings": pc_loadings,
123
+ "slow_pc_loadings": slow_pc_loadings,
124
+ "explained_variance_ratio": pc_var_ratio,
125
+ "slow_explained_variance_ratio": slow_pc_var_ratio,
126
+ "cleaning_coefficients": cleaning_coef,
127
+ "policy_cleaning_coefficients": policy_cleaning_coef,
128
+ "measurement_intercept": measurement_coef[0, :],
129
+ "measurement_loadings": measurement_coef[1:, :],
130
+ "x_mean": x_mean,
131
+ "x_std": x_std,
132
+ }
133
+
134
+ def select_order(self, maxlags: int = 12, trend: str = "c"):
135
+ """Select the lag order of the augmented FAVAR system.
136
+
137
+ Returns
138
+ -------
139
+ FAVAROrderSelection
140
+ Object with selected orders, a DataFrame representation, and a
141
+ printable summary table.
142
+ """
143
+ prepared = self._prepare_system()
144
+ var_model = VAR(prepared["var_data"])
145
+ order_results = var_model.select_order(maxlags=maxlags, trend=trend)
146
+ return FAVAROrderSelection(
147
+ order_results,
148
+ k_factors=self.k_factors,
149
+ n_x=len(prepared["x_names"]),
150
+ n_slow=len(prepared["slow_columns"]),
151
+ policy_var=self.policy_var,
152
+ maxlags=maxlags,
153
+ )
154
+
155
+ def fit(
156
+ self,
157
+ lags: int | None = 13,
158
+ select_order: str | None = None,
159
+ maxlags: int | None = None,
160
+ trend: str = "c",
161
+ verbose: bool = False,
162
+ ):
163
+ """Estimate the FAVAR.
164
+
165
+ Parameters
166
+ ----------
167
+ lags : int, default 13
168
+ Fixed VAR lag order. Ignored when ``select_order`` is provided.
169
+ select_order : {"aic", "bic", "hqic", "fpe"}, optional
170
+ Information criterion used to select the VAR lag order.
171
+ maxlags : int, optional
172
+ Maximum lag considered when ``select_order`` is used. If omitted,
173
+ ``lags`` is used.
174
+ trend : {"c", "ct", "ctt", "n"}, default "c"
175
+ Deterministic terms in the VAR step.
176
+ verbose : bool, default False
177
+ Print lag-selection details when available.
178
+ """
179
+ prepared = self._prepare_system()
180
+ var_model = VAR(prepared["var_data"])
181
+ if select_order is None:
182
+ var_results = var_model.fit(
183
+ maxlags=lags, ic=None, trend=trend, verbose=verbose
184
+ )
185
+ else:
186
+ lag_cap = maxlags if maxlags is not None else lags
187
+ var_results = var_model.fit(
188
+ maxlags=lag_cap, ic=select_order, trend=trend, verbose=verbose
189
+ )
190
+
191
+ self._results = FAVARResults(
192
+ model=self,
193
+ var_results=var_results,
194
+ var_data=prepared["var_data"],
195
+ order=prepared["order"],
196
+ factor_names=prepared["factor_names"],
197
+ y_names=prepared["y_names"],
198
+ x_names=prepared["x_names"],
199
+ slow_columns=prepared["slow_columns"],
200
+ policy_var=self.policy_var,
201
+ factors=prepared["factors"],
202
+ principal_components=prepared["principal_components"],
203
+ slow_principal_components=prepared["slow_principal_components"],
204
+ pc_loadings=prepared["pc_loadings"],
205
+ slow_pc_loadings=prepared["slow_pc_loadings"],
206
+ explained_variance_ratio=prepared["explained_variance_ratio"],
207
+ slow_explained_variance_ratio=prepared["slow_explained_variance_ratio"],
208
+ cleaning_coefficients=prepared["cleaning_coefficients"],
209
+ policy_cleaning_coefficients=prepared["policy_cleaning_coefficients"],
210
+ measurement_intercept=prepared["measurement_intercept"],
211
+ measurement_loadings=prepared["measurement_loadings"],
212
+ x_mean=prepared["x_mean"],
213
+ x_std=prepared["x_std"],
214
+ index=self.X.index,
215
+ standardize=self.standardize,
216
+ )
217
+ return self._results
218
+
219
+ def _check_results(self) -> FAVARResults:
220
+ if self._results is None:
221
+ raise RuntimeError("Call fit() before accessing results.")
222
+ return self._results
223
+
224
+ def summary(self):
225
+ return self._check_results().summary()
226
+
227
+ def forecast(self, steps: int, alpha: float = 0.05, confidence_level=None):
228
+ return self._check_results().forecast(
229
+ steps, alpha=alpha, confidence_level=confidence_level
230
+ )
231
+
232
+ def impulse_response(self, periods: int = 10, shock: str | None = None, **kwargs):
233
+ return self._check_results().impulse_response(periods, shock=shock, **kwargs)
234
+
235
+ def panel_impulse_response(
236
+ self, periods: int = 10, shock: str | None = None, **kwargs
237
+ ):
238
+ return self._check_results().panel_impulse_response(
239
+ periods, shock=shock, **kwargs
240
+ )
@@ -0,0 +1,71 @@
1
+ """Lag-order selection results for FAVAR models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pandas as pd
6
+ from statsmodels.iolib.table import SimpleTable
7
+
8
+
9
+ class FAVAROrderSelection:
10
+ """Lag-order selection table for the augmented FAVAR system."""
11
+
12
+ def __init__(
13
+ self,
14
+ order_results,
15
+ k_factors: int,
16
+ n_x: int,
17
+ n_slow: int,
18
+ policy_var: str,
19
+ maxlags: int,
20
+ ):
21
+ self.order_results = order_results
22
+ self.ics = order_results.ics
23
+ self.selected_orders = dict(order_results.selected_orders)
24
+ self.aic = self.selected_orders["aic"]
25
+ self.bic = self.selected_orders["bic"]
26
+ self.fpe = self.selected_orders["fpe"]
27
+ self.hqic = self.selected_orders["hqic"]
28
+ self.k_factors = k_factors
29
+ self.n_x = n_x
30
+ self.n_slow = n_slow
31
+ self.policy_var = policy_var
32
+ self.maxlags = maxlags
33
+
34
+ def __repr__(self):
35
+ return str(self.summary())
36
+
37
+ def __str__(self):
38
+ return (
39
+ "<FAVAROrderSelection: "
40
+ f"AIC={self.aic}, BIC={self.bic}, FPE={self.fpe}, HQIC={self.hqic}>"
41
+ )
42
+
43
+ def to_frame(self, mark_min: bool = False) -> pd.DataFrame:
44
+ """Return the information criteria as a DataFrame."""
45
+ columns = ["aic", "bic", "fpe", "hqic"]
46
+ nrows = len(self.ics[columns[0]])
47
+ p_min = self.maxlags - nrows + 1
48
+ index = range(p_min, self.maxlags + 1)
49
+ frame = pd.DataFrame(
50
+ {col.upper(): self.ics[col] for col in columns}, index=index
51
+ )
52
+ if mark_min:
53
+ out = pd.DataFrame(index=frame.index, columns=frame.columns, dtype=object)
54
+ for row in frame.index:
55
+ for col in frame.columns:
56
+ out.loc[row, col] = f"{frame.loc[row, col]:#.4g}"
57
+ for col, selected in self.selected_orders.items():
58
+ out.loc[selected, col.upper()] = f"{frame.loc[selected, col.upper()]:#.4g}*"
59
+ return out
60
+ return frame
61
+
62
+ def summary(self):
63
+ """Return a printable order-selection table."""
64
+ frame = self.to_frame(mark_min=True)
65
+ table = SimpleTable(
66
+ frame.to_numpy(dtype=object),
67
+ headers=list(frame.columns),
68
+ stubs=[str(i) for i in frame.index],
69
+ title="FAVAR Lag Order Selection (* highlights the minimums)",
70
+ )
71
+ return table