pymc-extras 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymc_extras/distributions/__init__.py +5 -5
- pymc_extras/distributions/histogram_utils.py +1 -1
- pymc_extras/inference/__init__.py +1 -1
- pymc_extras/printing.py +1 -1
- pymc_extras/statespace/__init__.py +4 -4
- pymc_extras/statespace/core/__init__.py +1 -1
- pymc_extras/statespace/core/representation.py +8 -8
- pymc_extras/statespace/core/statespace.py +94 -23
- pymc_extras/statespace/filters/__init__.py +3 -3
- pymc_extras/statespace/filters/kalman_filter.py +16 -11
- pymc_extras/statespace/models/SARIMAX.py +138 -74
- pymc_extras/statespace/models/VARMAX.py +248 -57
- pymc_extras/statespace/models/__init__.py +2 -2
- pymc_extras/statespace/models/structural/__init__.py +4 -4
- pymc_extras/statespace/models/structural/components/autoregressive.py +49 -24
- pymc_extras/statespace/models/structural/components/cycle.py +48 -28
- pymc_extras/statespace/models/structural/components/level_trend.py +61 -29
- pymc_extras/statespace/models/structural/components/measurement_error.py +22 -5
- pymc_extras/statespace/models/structural/components/regression.py +47 -18
- pymc_extras/statespace/models/structural/components/seasonality.py +278 -95
- pymc_extras/statespace/models/structural/core.py +27 -8
- pymc_extras/statespace/utils/constants.py +17 -14
- pymc_extras/statespace/utils/data_tools.py +1 -1
- {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/METADATA +1 -1
- {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/RECORD +27 -27
- {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/WHEEL +0 -0
- {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,11 +14,13 @@ from pymc_extras.statespace.utils.constants import (
|
|
|
14
14
|
ALL_STATE_AUX_DIM,
|
|
15
15
|
ALL_STATE_DIM,
|
|
16
16
|
AR_PARAM_DIM,
|
|
17
|
+
EXOGENOUS_DIM,
|
|
17
18
|
MA_PARAM_DIM,
|
|
18
19
|
OBS_STATE_AUX_DIM,
|
|
19
20
|
OBS_STATE_DIM,
|
|
20
21
|
SHOCK_AUX_DIM,
|
|
21
22
|
SHOCK_DIM,
|
|
23
|
+
TIME_DIM,
|
|
22
24
|
)
|
|
23
25
|
|
|
24
26
|
floatX = pytensor.config.floatX
|
|
@@ -28,60 +30,6 @@ class BayesianVARMAX(PyMCStateSpace):
|
|
|
28
30
|
r"""
|
|
29
31
|
Vector AutoRegressive Moving Average with eXogenous Regressors
|
|
30
32
|
|
|
31
|
-
Parameters
|
|
32
|
-
----------
|
|
33
|
-
order: tuple of (int, int)
|
|
34
|
-
Number of autoregressive (AR) and moving average (MA) terms to include in the model. All terms up to the
|
|
35
|
-
specified order are included. For restricted models, set zeros directly on the priors.
|
|
36
|
-
|
|
37
|
-
endog_names: list of str, optional
|
|
38
|
-
Names of the endogenous variables being modeled. Used to generate names for the state and shock coords. If
|
|
39
|
-
None, the state names will simply be numbered.
|
|
40
|
-
|
|
41
|
-
Exactly one of either ``endog_names`` or ``k_endog`` must be specified.
|
|
42
|
-
|
|
43
|
-
k_endog: int, optional
|
|
44
|
-
Number of endogenous states to be modeled.
|
|
45
|
-
|
|
46
|
-
Exactly one of either ``endog_names`` or ``k_endog`` must be specified.
|
|
47
|
-
|
|
48
|
-
stationary_initialization: bool, default False
|
|
49
|
-
If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady
|
|
50
|
-
state values will be used. If False, the user is responsible for setting priors on the initial state and
|
|
51
|
-
initial covariance.
|
|
52
|
-
|
|
53
|
-
..warning :: This option is very sensitive to the priors placed on the AR and MA parameters. If the model dynamics
|
|
54
|
-
for a given sample are not stationary, sampling will fail with a "covariance is not positive semi-definite"
|
|
55
|
-
error.
|
|
56
|
-
|
|
57
|
-
filter_type: str, default "standard"
|
|
58
|
-
The type of Kalman Filter to use. Options are "standard", "single", "univariate", "steady_state",
|
|
59
|
-
and "cholesky". See the docs for kalman filters for more details.
|
|
60
|
-
|
|
61
|
-
state_structure: str, default "fast"
|
|
62
|
-
How to represent the state-space system. When "interpretable", each element of the state vector will have a
|
|
63
|
-
precise meaning as either lagged data, innovations, or lagged innovations. This comes at the cost of a larger
|
|
64
|
-
state vector, which may hurt performance.
|
|
65
|
-
|
|
66
|
-
When "fast", states are combined to minimize the dimension of the state vector, but lags and innovations are
|
|
67
|
-
mixed together as a result. Only the first state (the modeled timeseries) will have an obvious interpretation
|
|
68
|
-
in this case.
|
|
69
|
-
|
|
70
|
-
measurement_error: bool, default True
|
|
71
|
-
If true, a measurement error term is added to the model.
|
|
72
|
-
|
|
73
|
-
verbose: bool, default True
|
|
74
|
-
If true, a message will be logged to the terminal explaining the variable names, dimensions, and supports.
|
|
75
|
-
|
|
76
|
-
mode: str or Mode, optional
|
|
77
|
-
Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and
|
|
78
|
-
``forecast``. The mode does **not** effect calls to ``pm.sample``.
|
|
79
|
-
|
|
80
|
-
Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument
|
|
81
|
-
to all sampling methods.
|
|
82
|
-
|
|
83
|
-
Notes
|
|
84
|
-
-----
|
|
85
33
|
The VARMA model is a multivariate extension of the SARIMAX model. Given a set of timeseries :math:`\{x_t\}_{t=0}^T`,
|
|
86
34
|
with :math:`x_t = \begin{bmatrix} x_{1,t} & x_{2,t} & \cdots & x_{k,t} \end{bmatrix}^T`, a VARMA models each series
|
|
87
35
|
as a function of the histories of all series. Specifically, denoting the AR-MA order as (p, q), a VARMA can be
|
|
@@ -152,23 +100,143 @@ class BayesianVARMAX(PyMCStateSpace):
|
|
|
152
100
|
order: tuple[int, int],
|
|
153
101
|
endog_names: list[str] | None = None,
|
|
154
102
|
k_endog: int | None = None,
|
|
103
|
+
exog_state_names: list[str] | dict[str, list[str]] | None = None,
|
|
104
|
+
k_exog: int | dict[str, int] | None = None,
|
|
155
105
|
stationary_initialization: bool = False,
|
|
156
106
|
filter_type: str = "standard",
|
|
157
107
|
measurement_error: bool = False,
|
|
158
108
|
verbose: bool = True,
|
|
159
109
|
mode: str | Mode | None = None,
|
|
160
110
|
):
|
|
111
|
+
"""
|
|
112
|
+
Create a Bayesian VARMAX model.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
order: tuple of (int, int)
|
|
117
|
+
Number of autoregressive (AR) and moving average (MA) terms to include in the model. All terms up to the
|
|
118
|
+
specified order are included. For restricted models, set zeros directly on the priors.
|
|
119
|
+
|
|
120
|
+
endog_names: list of str, optional
|
|
121
|
+
Names of the endogenous variables being modeled. Used to generate names for the state and shock coords. If
|
|
122
|
+
None, the state names will simply be numbered.
|
|
123
|
+
|
|
124
|
+
Exactly one of either ``endog_names`` or ``k_endog`` must be specified.
|
|
125
|
+
|
|
126
|
+
exog_state_names : list[str] or dict[str, list[str]], optional
|
|
127
|
+
Names of the exogenous state variables. If a list, all endogenous variables will share the same exogenous
|
|
128
|
+
variables. If a dict, keys should be the names of the endogenous variables, and values should be lists of the
|
|
129
|
+
exogenous variable names for that endogenous variable. Endogenous variables not included in the dict will
|
|
130
|
+
be assumed to have no exogenous variables. If None, no exogenous variables will be included.
|
|
131
|
+
|
|
132
|
+
k_exog : int or dict[str, int], optional
|
|
133
|
+
Number of exogenous variables. If an int, all endogenous variables will share the same number of exogenous
|
|
134
|
+
variables. If a dict, keys should be the names of the endogenous variables, and values should be the number of
|
|
135
|
+
exogenous variables for that endogenous variable. Endogenous variables not included in the dict will be
|
|
136
|
+
assumed to have no exogenous variables. If None, no exogenous variables will be included.
|
|
137
|
+
|
|
138
|
+
stationary_initialization: bool, default False
|
|
139
|
+
If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady
|
|
140
|
+
state values will be used. If False, the user is responsible for setting priors on the initial state and
|
|
141
|
+
initial covariance.
|
|
142
|
+
|
|
143
|
+
..warning :: This option is very sensitive to the priors placed on the AR and MA parameters. If the model dynamics
|
|
144
|
+
for a given sample are not stationary, sampling will fail with a "covariance is not positive semi-definite"
|
|
145
|
+
error.
|
|
146
|
+
|
|
147
|
+
filter_type: str, default "standard"
|
|
148
|
+
The type of Kalman Filter to use. Options are "standard", "single", "univariate", "steady_state",
|
|
149
|
+
and "cholesky". See the docs for kalman filters for more details.
|
|
150
|
+
|
|
151
|
+
state_structure: str, default "fast"
|
|
152
|
+
How to represent the state-space system. When "interpretable", each element of the state vector will have a
|
|
153
|
+
precise meaning as either lagged data, innovations, or lagged innovations. This comes at the cost of a larger
|
|
154
|
+
state vector, which may hurt performance.
|
|
155
|
+
|
|
156
|
+
When "fast", states are combined to minimize the dimension of the state vector, but lags and innovations are
|
|
157
|
+
mixed together as a result. Only the first state (the modeled timeseries) will have an obvious interpretation
|
|
158
|
+
in this case.
|
|
159
|
+
|
|
160
|
+
measurement_error: bool, default True
|
|
161
|
+
If true, a measurement error term is added to the model.
|
|
162
|
+
|
|
163
|
+
verbose: bool, default True
|
|
164
|
+
If true, a message will be logged to the terminal explaining the variable names, dimensions, and supports.
|
|
165
|
+
|
|
166
|
+
mode: str or Mode, optional
|
|
167
|
+
Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and
|
|
168
|
+
``forecast``. The mode does **not** effect calls to ``pm.sample``.
|
|
169
|
+
|
|
170
|
+
Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument
|
|
171
|
+
to all sampling methods.
|
|
172
|
+
|
|
173
|
+
"""
|
|
161
174
|
if (endog_names is None) and (k_endog is None):
|
|
162
175
|
raise ValueError("Must specify either endog_names or k_endog")
|
|
163
176
|
if (endog_names is not None) and (k_endog is None):
|
|
164
177
|
k_endog = len(endog_names)
|
|
165
178
|
if (endog_names is None) and (k_endog is not None):
|
|
166
|
-
endog_names = [f"
|
|
179
|
+
endog_names = [f"observed_{i}" for i in range(k_endog)]
|
|
167
180
|
if (endog_names is not None) and (k_endog is not None):
|
|
168
181
|
if len(endog_names) != k_endog:
|
|
169
182
|
raise ValueError("Length of provided endog_names does not match provided k_endog")
|
|
170
183
|
|
|
184
|
+
if k_exog is not None and not isinstance(k_exog, int | dict):
|
|
185
|
+
raise ValueError("If not None, k_endog must be either an int or a dict")
|
|
186
|
+
if exog_state_names is not None and not isinstance(exog_state_names, list | dict):
|
|
187
|
+
raise ValueError("If not None, exog_state_names must be either a list or a dict")
|
|
188
|
+
|
|
189
|
+
if k_exog is not None and exog_state_names is not None:
|
|
190
|
+
if isinstance(k_exog, int) and isinstance(exog_state_names, list):
|
|
191
|
+
if len(exog_state_names) != k_exog:
|
|
192
|
+
raise ValueError("Length of exog_state_names does not match provided k_exog")
|
|
193
|
+
elif isinstance(k_exog, int) and isinstance(exog_state_names, dict):
|
|
194
|
+
raise ValueError(
|
|
195
|
+
"If k_exog is an int, exog_state_names must be a list of the same length (or None)"
|
|
196
|
+
)
|
|
197
|
+
elif isinstance(k_exog, dict) and isinstance(exog_state_names, list):
|
|
198
|
+
raise ValueError(
|
|
199
|
+
"If k_exog is a dict, exog_state_names must be a dict as well (or None)"
|
|
200
|
+
)
|
|
201
|
+
elif isinstance(k_exog, dict) and isinstance(exog_state_names, dict):
|
|
202
|
+
if set(k_exog.keys()) != set(exog_state_names.keys()):
|
|
203
|
+
raise ValueError("Keys of k_exog and exog_state_names dicts must match")
|
|
204
|
+
if not all(
|
|
205
|
+
len(names) == k for names, k in zip(exog_state_names.values(), k_exog.values())
|
|
206
|
+
):
|
|
207
|
+
raise ValueError(
|
|
208
|
+
"If both k_endog and exog_state_names are provided, lengths of exog_state_names "
|
|
209
|
+
"lists must match corresponding values in k_exog"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if k_exog is not None and exog_state_names is None:
|
|
213
|
+
if isinstance(k_exog, int):
|
|
214
|
+
exog_state_names = [f"exogenous_{i}" for i in range(k_exog)]
|
|
215
|
+
elif isinstance(k_exog, dict):
|
|
216
|
+
exog_state_names = {
|
|
217
|
+
name: [f"{name}_exogenous_{i}" for i in range(k)] for name, k in k_exog.items()
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if k_exog is None and exog_state_names is not None:
|
|
221
|
+
if isinstance(exog_state_names, list):
|
|
222
|
+
k_exog = len(exog_state_names)
|
|
223
|
+
elif isinstance(exog_state_names, dict):
|
|
224
|
+
k_exog = {name: len(names) for name, names in exog_state_names.items()}
|
|
225
|
+
|
|
226
|
+
# If exog_state_names is a dict but 1) all endog variables are among the keys, and 2) all values are the same
|
|
227
|
+
# then we can drop back to the list case.
|
|
228
|
+
if (
|
|
229
|
+
isinstance(exog_state_names, dict)
|
|
230
|
+
and set(exog_state_names.keys()) == set(endog_names)
|
|
231
|
+
and len({frozenset(val) for val in exog_state_names.values()}) == 1
|
|
232
|
+
):
|
|
233
|
+
exog_state_names = exog_state_names[endog_names[0]]
|
|
234
|
+
k_exog = len(exog_state_names)
|
|
235
|
+
|
|
171
236
|
self.endog_names = list(endog_names)
|
|
237
|
+
self.exog_state_names = exog_state_names
|
|
238
|
+
|
|
239
|
+
self.k_exog = k_exog
|
|
172
240
|
self.p, self.q = order
|
|
173
241
|
self.stationary_initialization = stationary_initialization
|
|
174
242
|
|
|
@@ -208,6 +276,14 @@ class BayesianVARMAX(PyMCStateSpace):
|
|
|
208
276
|
names.remove("ar_params")
|
|
209
277
|
if self.q == 0:
|
|
210
278
|
names.remove("ma_params")
|
|
279
|
+
|
|
280
|
+
# Add exogenous regression coefficents rather than remove, since we might have to handle
|
|
281
|
+
# several (if self.exog_state_names is a dict)
|
|
282
|
+
if isinstance(self.exog_state_names, list):
|
|
283
|
+
names.append("beta_exog")
|
|
284
|
+
elif isinstance(self.exog_state_names, dict):
|
|
285
|
+
names.extend([f"beta_{name}" for name in self.exog_state_names.keys()])
|
|
286
|
+
|
|
211
287
|
return names
|
|
212
288
|
|
|
213
289
|
@property
|
|
@@ -239,19 +315,65 @@ class BayesianVARMAX(PyMCStateSpace):
|
|
|
239
315
|
},
|
|
240
316
|
}
|
|
241
317
|
|
|
318
|
+
if isinstance(self.exog_state_names, list):
|
|
319
|
+
k_exog = len(self.exog_state_names)
|
|
320
|
+
info["beta_exog"] = {
|
|
321
|
+
"shape": (self.k_endog, k_exog),
|
|
322
|
+
"constraints": "None",
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
elif isinstance(self.exog_state_names, dict):
|
|
326
|
+
for name, exog_names in self.exog_state_names.items():
|
|
327
|
+
k_exog = len(exog_names)
|
|
328
|
+
info[f"beta_{name}"] = {
|
|
329
|
+
"shape": (k_exog,),
|
|
330
|
+
"constraints": "None",
|
|
331
|
+
}
|
|
332
|
+
|
|
242
333
|
for name in self.param_names:
|
|
243
334
|
info[name]["dims"] = self.param_dims[name]
|
|
244
335
|
|
|
245
336
|
return {name: info[name] for name in self.param_names}
|
|
246
337
|
|
|
338
|
+
@property
|
|
339
|
+
def data_info(self) -> dict[str, dict[str, Any]]:
|
|
340
|
+
info = None
|
|
341
|
+
|
|
342
|
+
if isinstance(self.exog_state_names, list):
|
|
343
|
+
info = {
|
|
344
|
+
"exogenous_data": {
|
|
345
|
+
"dims": (TIME_DIM, EXOGENOUS_DIM),
|
|
346
|
+
"shape": (None, self.k_exog),
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
elif isinstance(self.exog_state_names, dict):
|
|
351
|
+
info = {
|
|
352
|
+
f"{endog_state}_exogenous_data": {
|
|
353
|
+
"dims": (TIME_DIM, f"{EXOGENOUS_DIM}_{endog_state}"),
|
|
354
|
+
"shape": (None, len(exog_names)),
|
|
355
|
+
}
|
|
356
|
+
for endog_state, exog_names in self.exog_state_names.items()
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return info
|
|
360
|
+
|
|
361
|
+
@property
|
|
362
|
+
def data_names(self) -> list[str]:
|
|
363
|
+
if isinstance(self.exog_state_names, list):
|
|
364
|
+
return ["exogenous_data"]
|
|
365
|
+
elif isinstance(self.exog_state_names, dict):
|
|
366
|
+
return [f"{endog_state}_exogenous_data" for endog_state in self.exog_state_names.keys()]
|
|
367
|
+
return []
|
|
368
|
+
|
|
247
369
|
@property
|
|
248
370
|
def state_names(self):
|
|
249
371
|
state_names = self.endog_names.copy()
|
|
250
372
|
state_names += [
|
|
251
|
-
f"L{i + 1}
|
|
373
|
+
f"L{i + 1}_{state}" for i in range(self.p - 1) for state in self.endog_names
|
|
252
374
|
]
|
|
253
375
|
state_names += [
|
|
254
|
-
f"L{i + 1}
|
|
376
|
+
f"L{i + 1}_{state}_innov" for i in range(self.q) for state in self.endog_names
|
|
255
377
|
]
|
|
256
378
|
|
|
257
379
|
return state_names
|
|
@@ -276,6 +398,12 @@ class BayesianVARMAX(PyMCStateSpace):
|
|
|
276
398
|
if self.q > 0:
|
|
277
399
|
coords.update({MA_PARAM_DIM: list(range(1, self.q + 1))})
|
|
278
400
|
|
|
401
|
+
if isinstance(self.exog_state_names, list):
|
|
402
|
+
coords[EXOGENOUS_DIM] = self.exog_state_names
|
|
403
|
+
elif isinstance(self.exog_state_names, dict):
|
|
404
|
+
for name, exog_names in self.exog_state_names.items():
|
|
405
|
+
coords[f"{EXOGENOUS_DIM}_{name}"] = exog_names
|
|
406
|
+
|
|
279
407
|
return coords
|
|
280
408
|
|
|
281
409
|
@property
|
|
@@ -299,6 +427,14 @@ class BayesianVARMAX(PyMCStateSpace):
|
|
|
299
427
|
del coord_map["P0"]
|
|
300
428
|
del coord_map["x0"]
|
|
301
429
|
|
|
430
|
+
if isinstance(self.exog_state_names, list):
|
|
431
|
+
coord_map["beta_exog"] = (OBS_STATE_DIM, EXOGENOUS_DIM)
|
|
432
|
+
elif isinstance(self.exog_state_names, dict):
|
|
433
|
+
# If each state has its own exogenous variables, each parameter needs it own dim, since we expect the
|
|
434
|
+
# dim labels to all be different (otherwise we'd be in the list case).
|
|
435
|
+
for name in self.exog_state_names.keys():
|
|
436
|
+
coord_map[f"beta_{name}"] = (f"{EXOGENOUS_DIM}_{name}",)
|
|
437
|
+
|
|
302
438
|
return coord_map
|
|
303
439
|
|
|
304
440
|
def add_default_priors(self):
|
|
@@ -386,6 +522,61 @@ class BayesianVARMAX(PyMCStateSpace):
|
|
|
386
522
|
)
|
|
387
523
|
self.ssm["state_cov", :, :] = state_cov
|
|
388
524
|
|
|
525
|
+
if self.exog_state_names is not None:
|
|
526
|
+
if isinstance(self.exog_state_names, list):
|
|
527
|
+
beta_exog = self.make_and_register_variable(
|
|
528
|
+
"beta_exog", shape=(self.k_posdef, self.k_exog), dtype=floatX
|
|
529
|
+
)
|
|
530
|
+
exog_data = self.make_and_register_data(
|
|
531
|
+
"exogenous_data", shape=(None, self.k_exog), dtype=floatX
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
obs_intercept = exog_data @ beta_exog.T
|
|
535
|
+
|
|
536
|
+
elif isinstance(self.exog_state_names, dict):
|
|
537
|
+
obs_components = []
|
|
538
|
+
for i, name in enumerate(self.endog_names):
|
|
539
|
+
if name in self.exog_state_names:
|
|
540
|
+
k_exog = len(self.exog_state_names[name])
|
|
541
|
+
beta_exog = self.make_and_register_variable(
|
|
542
|
+
f"beta_{name}", shape=(k_exog,), dtype=floatX
|
|
543
|
+
)
|
|
544
|
+
exog_data = self.make_and_register_data(
|
|
545
|
+
f"{name}_exogenous_data", shape=(None, k_exog), dtype=floatX
|
|
546
|
+
)
|
|
547
|
+
obs_components.append(pt.expand_dims(exog_data @ beta_exog, axis=-1))
|
|
548
|
+
else:
|
|
549
|
+
obs_components.append(pt.zeros((1, 1), dtype=floatX))
|
|
550
|
+
|
|
551
|
+
# TODO: Replace all of this with pt.concat_with_broadcast once PyMC works with pytensor >= 2.32
|
|
552
|
+
|
|
553
|
+
# If there were any zeros, they need to be broadcast against the non-zeros.
|
|
554
|
+
# Core shape is the last dim, the time dim is always broadcast
|
|
555
|
+
non_concat_shape = [1, None]
|
|
556
|
+
|
|
557
|
+
# Look for the first non-zero component to get the shape from
|
|
558
|
+
for tensor_inp in obs_components:
|
|
559
|
+
for i, (bcast, sh) in enumerate(
|
|
560
|
+
zip(tensor_inp.type.broadcastable, tensor_inp.shape)
|
|
561
|
+
):
|
|
562
|
+
if bcast or i == 1:
|
|
563
|
+
continue
|
|
564
|
+
non_concat_shape[i] = sh
|
|
565
|
+
|
|
566
|
+
assert non_concat_shape.count(None) == 1
|
|
567
|
+
|
|
568
|
+
bcast_tensor_inputs = []
|
|
569
|
+
for tensor_inp in obs_components:
|
|
570
|
+
non_concat_shape[1] = tensor_inp.shape[1]
|
|
571
|
+
bcast_tensor_inputs.append(pt.broadcast_to(tensor_inp, non_concat_shape))
|
|
572
|
+
|
|
573
|
+
obs_intercept = pt.join(1, *bcast_tensor_inputs)
|
|
574
|
+
|
|
575
|
+
else:
|
|
576
|
+
raise NotImplementedError()
|
|
577
|
+
|
|
578
|
+
self.ssm["obs_intercept"] = obs_intercept
|
|
579
|
+
|
|
389
580
|
if self.stationary_initialization:
|
|
390
581
|
# Solve for matrix quadratic for P0
|
|
391
582
|
T = self.ssm["transition"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pymc_extras.statespace.models import structural
|
|
2
2
|
from pymc_extras.statespace.models.ETS import BayesianETS
|
|
3
|
-
from pymc_extras.statespace.models.SARIMAX import
|
|
3
|
+
from pymc_extras.statespace.models.SARIMAX import BayesianSARIMAX
|
|
4
4
|
from pymc_extras.statespace.models.VARMAX import BayesianVARMAX
|
|
5
5
|
|
|
6
|
-
__all__ = ["
|
|
6
|
+
__all__ = ["BayesianSARIMAX", "BayesianVARMAX", "BayesianETS", "structural"]
|
|
@@ -11,11 +11,11 @@ from pymc_extras.statespace.models.structural.components.seasonality import (
|
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
|
-
"LevelTrendComponent",
|
|
15
|
-
"MeasurementError",
|
|
16
14
|
"AutoregressiveComponent",
|
|
17
|
-
"
|
|
15
|
+
"CycleComponent",
|
|
18
16
|
"FrequencySeasonality",
|
|
17
|
+
"LevelTrendComponent",
|
|
18
|
+
"MeasurementError",
|
|
19
19
|
"RegressionComponent",
|
|
20
|
-
"
|
|
20
|
+
"TimeSeasonality",
|
|
21
21
|
]
|
|
@@ -23,6 +23,11 @@ class AutoregressiveComponent(Component):
|
|
|
23
23
|
observed_state_names: list[str] | None, default None
|
|
24
24
|
List of strings for observed state labels. If None, defaults to ["data"].
|
|
25
25
|
|
|
26
|
+
share_states: bool, default False
|
|
27
|
+
Whether latent states are shared across the observed states. If True, there will be only one set of latent
|
|
28
|
+
states, which are observed by all observed states. If False, each observed state has its own set of
|
|
29
|
+
latent states. This argument has no effect if `k_endog` is 1.
|
|
30
|
+
|
|
26
31
|
Notes
|
|
27
32
|
-----
|
|
28
33
|
An autoregressive component can be thought of as a way o introducing serially correlated errors into the model.
|
|
@@ -41,7 +46,7 @@ class AutoregressiveComponent(Component):
|
|
|
41
46
|
The coefficient :math:`\rho_3` has been constrained to zero.
|
|
42
47
|
|
|
43
48
|
.. warning:: This class is meant to be used as a component in a structural time series model. For modeling of
|
|
44
|
-
stationary processes with ARIMA, use ``statespace.
|
|
49
|
+
stationary processes with ARIMA, use ``statespace.BayesianSARIMAX``.
|
|
45
50
|
|
|
46
51
|
Examples
|
|
47
52
|
--------
|
|
@@ -73,48 +78,62 @@ class AutoregressiveComponent(Component):
|
|
|
73
78
|
order: int = 1,
|
|
74
79
|
name: str = "auto_regressive",
|
|
75
80
|
observed_state_names: list[str] | None = None,
|
|
81
|
+
share_states: bool = False,
|
|
76
82
|
):
|
|
77
83
|
if observed_state_names is None:
|
|
78
84
|
observed_state_names = ["data"]
|
|
79
85
|
|
|
80
|
-
|
|
86
|
+
k_endog = len(observed_state_names)
|
|
87
|
+
k_endog_effective = k_posdef = 1 if share_states else k_endog
|
|
81
88
|
|
|
82
89
|
order = order_to_mask(order)
|
|
83
90
|
ar_lags = np.flatnonzero(order).ravel().astype(int) + 1
|
|
84
91
|
k_states = len(order)
|
|
85
92
|
|
|
93
|
+
self.share_states = share_states
|
|
86
94
|
self.order = order
|
|
87
95
|
self.ar_lags = ar_lags
|
|
88
96
|
|
|
89
97
|
super().__init__(
|
|
90
98
|
name=name,
|
|
91
99
|
k_endog=k_endog,
|
|
92
|
-
k_states=k_states *
|
|
100
|
+
k_states=k_states * k_endog_effective,
|
|
93
101
|
k_posdef=k_posdef,
|
|
94
102
|
measurement_error=True,
|
|
95
103
|
combine_hidden_states=True,
|
|
96
104
|
observed_state_names=observed_state_names,
|
|
97
|
-
obs_state_idxs=np.tile(np.r_[[1.0], np.zeros(k_states - 1)],
|
|
105
|
+
obs_state_idxs=np.tile(np.r_[[1.0], np.zeros(k_states - 1)], k_endog_effective),
|
|
106
|
+
share_states=share_states,
|
|
98
107
|
)
|
|
99
108
|
|
|
100
109
|
def populate_component_properties(self):
|
|
101
|
-
|
|
110
|
+
k_endog = self.k_endog
|
|
111
|
+
k_endog_effective = 1 if self.share_states else k_endog
|
|
102
112
|
|
|
103
|
-
self.
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
113
|
+
k_states = self.k_states // k_endog_effective # this is also the number of AR lags
|
|
114
|
+
base_names = [f"L{i + 1}_{self.name}" for i in range(k_states)]
|
|
115
|
+
|
|
116
|
+
if self.share_states:
|
|
117
|
+
self.state_names = [f"{name}[shared]" for name in base_names]
|
|
118
|
+
self.shock_names = [f"{self.name}[shared]"]
|
|
119
|
+
else:
|
|
120
|
+
self.state_names = [
|
|
121
|
+
f"{name}[{state_name}]"
|
|
122
|
+
for state_name in self.observed_state_names
|
|
123
|
+
for name in base_names
|
|
124
|
+
]
|
|
125
|
+
self.shock_names = [
|
|
126
|
+
f"{self.name}[{obs_name}]" for obs_name in self.observed_state_names
|
|
127
|
+
]
|
|
108
128
|
|
|
109
|
-
self.shock_names = [f"{self.name}[{obs_name}]" for obs_name in self.observed_state_names]
|
|
110
129
|
self.param_names = [f"params_{self.name}", f"sigma_{self.name}"]
|
|
111
130
|
self.param_dims = {f"params_{self.name}": (f"lag_{self.name}",)}
|
|
112
131
|
self.coords = {f"lag_{self.name}": self.ar_lags.tolist()}
|
|
113
132
|
|
|
114
|
-
if
|
|
133
|
+
if k_endog_effective > 1:
|
|
115
134
|
self.param_dims[f"params_{self.name}"] = (
|
|
116
135
|
f"endog_{self.name}",
|
|
117
|
-
|
|
136
|
+
f"lag_{self.name}",
|
|
118
137
|
)
|
|
119
138
|
self.param_dims[f"sigma_{self.name}"] = (f"endog_{self.name}",)
|
|
120
139
|
|
|
@@ -140,18 +159,21 @@ class AutoregressiveComponent(Component):
|
|
|
140
159
|
|
|
141
160
|
def make_symbolic_graph(self) -> None:
|
|
142
161
|
k_endog = self.k_endog
|
|
143
|
-
|
|
162
|
+
k_endog_effective = 1 if self.share_states else k_endog
|
|
163
|
+
|
|
164
|
+
k_states = self.k_states // k_endog_effective
|
|
144
165
|
k_posdef = self.k_posdef
|
|
145
166
|
|
|
146
167
|
k_nonzero = int(sum(self.order))
|
|
147
168
|
ar_params = self.make_and_register_variable(
|
|
148
|
-
f"params_{self.name}",
|
|
169
|
+
f"params_{self.name}",
|
|
170
|
+
shape=(k_nonzero,) if k_endog_effective == 1 else (k_endog_effective, k_nonzero),
|
|
149
171
|
)
|
|
150
172
|
sigma_ar = self.make_and_register_variable(
|
|
151
|
-
f"sigma_{self.name}", shape=() if
|
|
173
|
+
f"sigma_{self.name}", shape=() if k_endog_effective == 1 else (k_endog_effective,)
|
|
152
174
|
)
|
|
153
175
|
|
|
154
|
-
if
|
|
176
|
+
if k_endog_effective == 1:
|
|
155
177
|
T = pt.eye(k_states, k=-1)
|
|
156
178
|
ar_idx = (np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0])
|
|
157
179
|
T = T[ar_idx].set(ar_params)
|
|
@@ -159,7 +181,7 @@ class AutoregressiveComponent(Component):
|
|
|
159
181
|
else:
|
|
160
182
|
transition_matrices = []
|
|
161
183
|
|
|
162
|
-
for i in range(
|
|
184
|
+
for i in range(k_endog_effective):
|
|
163
185
|
T = pt.eye(k_states, k=-1)
|
|
164
186
|
ar_idx = (np.zeros(k_nonzero, dtype="int"), np.nonzero(self.order)[0])
|
|
165
187
|
T = T[ar_idx].set(ar_params[i])
|
|
@@ -171,18 +193,21 @@ class AutoregressiveComponent(Component):
|
|
|
171
193
|
self.ssm["transition", :, :] = T
|
|
172
194
|
|
|
173
195
|
R = np.eye(k_states)
|
|
174
|
-
R_mask = np.full((k_states), False)
|
|
196
|
+
R_mask = np.full((k_states,), False)
|
|
175
197
|
R_mask[0] = True
|
|
176
198
|
R = R[:, R_mask]
|
|
177
199
|
|
|
178
200
|
self.ssm["selection", :, :] = pt.specify_shape(
|
|
179
|
-
pt.linalg.block_diag(*[R for _ in range(
|
|
201
|
+
pt.linalg.block_diag(*[R for _ in range(k_endog_effective)]), (self.k_states, k_posdef)
|
|
180
202
|
)
|
|
181
203
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
204
|
+
Zs = [pt.zeros((1, k_states))[0, 0].set(1.0) for _ in range(k_endog)]
|
|
205
|
+
|
|
206
|
+
if self.share_states:
|
|
207
|
+
Z = pt.join(0, *Zs)
|
|
208
|
+
else:
|
|
209
|
+
Z = pt.linalg.block_diag(*Zs)
|
|
210
|
+
self.ssm["design", :, :] = pt.specify_shape(Z, (k_endog, self.k_states))
|
|
186
211
|
|
|
187
212
|
cov_idx = ("state_cov", *np.diag_indices(k_posdef))
|
|
188
213
|
self.ssm[cov_idx] = sigma_ar**2
|