macrotrace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,990 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
5
+ import logging
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from tabulate import tabulate
10
+ from darts.models import (
11
+ ExponentialSmoothing,
12
+ NaiveDrift,
13
+ LinearRegressionModel,
14
+ ARIMA,
15
+ )
16
+ from darts.utils.statistics import (
17
+ extract_trend_and_seasonality,
18
+ granger_causality_tests,
19
+ )
20
+ from darts.utils.utils import ModelMode, SeasonalityMode
21
+ import statsmodels.api as sm
22
+ from statsmodels.stats.diagnostic import acorr_ljungbox
23
+ from statsmodels.stats.stattools import durbin_watson
24
+ from scipy.stats import t as t_dist
25
+
26
+ if TYPE_CHECKING: # pragma: no cover
27
+ from macrotrace.models.mt.time_series import MTTimeSeries
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ @dataclass
33
+ class BiasednessRegressionResult:
34
+ """
35
+ Container for biasedness regression output where __repr__ renders the table.
36
+ """
37
+
38
+ n_total: int
39
+ vintage_indices: Dict[str, Any]
40
+ data_notes: Dict[str, Any]
41
+ model: Dict[str, Any]
42
+ tests: Dict[str, Any]
43
+ assumptions: Dict[str, Any]
44
+ table: str
45
+
46
+ def __repr__(self) -> str:
47
+ return self.table or "BiasednessRegressionResult()"
48
+
49
+ def to_dict(self) -> Dict[str, Any]:
50
+ return {
51
+ "n_total": self.n_total,
52
+ "vintage_indices": self.vintage_indices,
53
+ "data_notes": self.data_notes,
54
+ "model": self.model,
55
+ "tests": self.tests,
56
+ "assumptions": self.assumptions,
57
+ "table": self.table,
58
+ }
59
+
60
+
61
+ @dataclass
62
+ class VintageComparison:
63
+ vintages: Dict[str, "MTTimeSeries"]
64
+ mode: str
65
+ strategy: str
66
+
67
+ def __post_init__(self):
68
+ if self.mode not in ("growth", "levels"):
69
+ raise ValueError(
70
+ f"Invalid mode: {self.mode}. Must be 'growth' or 'levels'."
71
+ )
72
+ if self.strategy not in ["sequential", "final", "all"]:
73
+ raise ValueError(
74
+ f"Invalid strategy: {self.strategy}. Must be 'sequential', 'final', or 'all'."
75
+ )
76
+
77
+ if len(self.vintages) < 2:
78
+ raise ValueError(
79
+ f"VintageComparison requires at least 2 vintages, got {len(self.vintages)}."
80
+ )
81
+
82
+ # Sort by the resolved vintage's release_date so strategies that depend
83
+ # on chronological order ("sequential", "final") behave correctly
84
+ # regardless of the order in which the user supplied vintage dates.
85
+ self.vintages = dict(
86
+ sorted(self.vintages.items(), key=lambda item: item[1].release_date)
87
+ )
88
+
89
+ self.comparison = self._calculate_comparison_metrics()
90
+ # set the attributes for each of the items in the comparison so we can access them like a dict
91
+ for key, value in self.comparison.items():
92
+ setattr(self, key, value)
93
+
94
+ def __getitem__(self, key: str):
95
+ if hasattr(self, key):
96
+ return getattr(self, key)
97
+ raise KeyError(f"{key} not found in {self.__class__.__name__}")
98
+
99
+ def __repr__(self) -> str:
100
+ return f"VintageComparison(mode={self.mode}, strategy={self.strategy})"
101
+
102
+ def _calculate_comparison_metrics(self) -> Dict[str, Any]:
103
+ processed_vintages = self._process_vintages()
104
+ vintage_items = list(processed_vintages.items())
105
+ comparisons = {}
106
+
107
+ # The 'sequential' comparison strategy compares vintage 0 to 1, 1 to 2, etc.
108
+ if self.strategy == "sequential":
109
+ for idx, (current_key, current_df) in enumerate(vintage_items[:-1]):
110
+ next_key, next_df = vintage_items[idx + 1]
111
+ comparison = self._compare_vintages(current_df, next_df)
112
+ comparisons[f"vintage_{current_key}_to_{next_key}"] = comparison
113
+
114
+ # The 'final' comparison strategy compares all vintages to the last one
115
+ elif self.strategy == "final":
116
+ final_key, final_df = vintage_items[-1]
117
+ for current_key, current_df in vintage_items[:-1]:
118
+ comparison = self._compare_vintages(current_df, final_df)
119
+ comparisons[f"vintage_{current_key}_to_{final_key}"] = comparison
120
+
121
+ # The 'all' comparison strategy compares all vintages to each other
122
+ elif self.strategy == "all":
123
+ for idx, (current_key, current_df) in enumerate(vintage_items):
124
+ for next_key, next_df in vintage_items[idx + 1 :]:
125
+ comparison = self._compare_vintages(current_df, next_df)
126
+ comparisons[f"vintage_{current_key}_to_{next_key}"] = comparison
127
+
128
+ return comparisons
129
+
130
+ def _compare_vintages(
131
+ self, current: pd.DataFrame, next_: pd.DataFrame
132
+ ) -> Dict[str, Any]:
133
+
134
+ merged = current.merge(next_, on="timestamp").sort_values("timestamp")
135
+ value_columns = [col for col in merged.columns if col != "timestamp"]
136
+ if len(value_columns) != 2:
137
+ raise ValueError(
138
+ "Vintage comparison requires exactly two value columns after merge."
139
+ )
140
+
141
+ col_0, col_1 = value_columns
142
+ revisions = merged[col_0] - merged[col_1]
143
+
144
+ # In growth mode col_0/col_1 are within-vintage growth rates and
145
+ # `revisions` is the cross-vintage revision *to the growth rate*.
146
+ # In levels mode col_0/col_1 are level values and `revisions` is the
147
+ # revision *to the level itself*. Bias, dispersion, extremes, std,
148
+ # and counts have parallel interpretations in both modes; the
149
+ # directional-miss measures differ, see below.
150
+ comparison = {
151
+ "bias": revisions.mean(),
152
+ "relative_bias": revisions.mean() / merged[col_1].mean(),
153
+ "dispersion": revisions.abs().mean(),
154
+ "relative_dispersion": revisions.abs().mean() / merged[col_1].abs().mean(),
155
+ "largest_upward_revision": revisions.min(),
156
+ "largest_downward_revision": revisions.max(),
157
+ "standard_deviation_of_revisions_difference": revisions.std(),
158
+ "counts": {
159
+ # Under the Young (1974) convention
160
+ # `revisions = preliminary - final`
161
+ # a negative revision means that 'final > preliminary'
162
+ # i.e. the series was revised UP between vintages.
163
+ "upward": (revisions < 0).sum(),
164
+ "downward": (revisions > 0).sum(),
165
+ "no_change": (revisions == 0).sum(),
166
+ },
167
+ }
168
+
169
+ if self.mode == "growth":
170
+ # Growth rates: a sign flip in the growth rate itself means the
171
+ # vintages disagree on whether the series rose or fell.
172
+ comparison["directional_misses_trend"] = (
173
+ merged[col_0] * merged[col_1] < 0
174
+ ).mean()
175
+ else:
176
+ # Levels: two distinct directional-miss notions.
177
+ # (1) Sign of the level (e.g. trade balance crossing zero).
178
+ comparison["directional_misses_sign"] = (
179
+ merged[col_0] * merged[col_1] < 0
180
+ ).mean()
181
+ # (2) Sign of the period-over-period change (rose vs fell).
182
+ diffs0 = merged[col_0].diff()
183
+ diffs1 = merged[col_1].diff()
184
+ comparison["directional_misses_trend"] = (diffs0 * diffs1 < 0).mean()
185
+
186
+ return comparison
187
+
188
+ def _process_vintages(self) -> Dict[str, pd.DataFrame]:
189
+ processed_vintages = {}
190
+ for as_of_date, vintage in self.vintages.items():
191
+ df = vintage.to_dataframe(mode="default")
192
+ df = df[["timestamp", "value"]].rename(
193
+ columns={"value": f"vintage_{as_of_date}"}
194
+ )
195
+ df = df.set_index("timestamp")
196
+ if self.mode == "growth":
197
+ df = df.pct_change()
198
+ df = df.dropna().reset_index()
199
+ processed_vintages[as_of_date] = df
200
+
201
+ return processed_vintages
202
+
203
+
204
+ class MTTimeSeriesAnalysis:
205
+ def __init__(self, ts: "MTTimeSeries") -> None:
206
+ self.ts = ts
207
+
208
+ def assess_revision_success(self) -> Tuple[pd.DataFrame, float]:
209
+ """
210
+ "A successful revision is defined as one which reduced the absolute value of the error between the estimate and the final figure."
211
+ Stekler (1967)
212
+
213
+ Assess the success of revisions of time series data.
214
+ This function compares each revision of the time series to the final value
215
+ and determines if the revision was successful based on whether it brought
216
+ the value closer to the final value.
217
+
218
+ Returns:
219
+ Tuple[pd.DataFrame, float]: A tuple containing:
220
+ - A DataFrame with success flags for each revision.
221
+ - The overall success rate of revisions as a float.
222
+ """
223
+ df = self.ts.generate_vintage_matrix()
224
+
225
+ flags_df = pd.DataFrame(index=df.index, columns=df.columns, dtype="boolean")
226
+ # set all flags to None to start
227
+ flags_df[:] = None
228
+
229
+ # With the vintage matrix, each row is a observation timestamp,
230
+ # and each column is a vintage date.
231
+
232
+ for obs_timestamp in df.index:
233
+ # Get the final value for this observation timestamp
234
+ final_value = df.loc[obs_timestamp].iloc[-1]
235
+
236
+ for i, vintage_date in enumerate(df.columns):
237
+ current_vintage_value = df.loc[obs_timestamp, df.columns[i]]
238
+ prior_vintage_value = df.loc[obs_timestamp, df.columns[i - 1]]
239
+
240
+ if pd.isna(current_vintage_value) or pd.isna(prior_vintage_value):
241
+ # If either value is NaN, we cannot assess revision success
242
+ continue
243
+ elif current_vintage_value == prior_vintage_value:
244
+ # If the current vintage value is the same as the prior, we cannot assess revision success (nothing changed)
245
+ continue
246
+ elif i == 0:
247
+ # If this is the first vintage, we cannot assess revision success (no prior value to compare to)
248
+ continue
249
+ elif i == len(df.columns) - 1:
250
+ # If this is the last vintage, we cannot assess revision success (no final value to compare to)
251
+ # Calling this revision a success would be disingenuous as it cannot be not successful.
252
+ continue
253
+
254
+ # Determine if the revision was successful
255
+ flags_df.loc[obs_timestamp, vintage_date] = (
256
+ self.ts._is_successful_revision(
257
+ current_vintage_value, prior_vintage_value, final_value
258
+ )
259
+ )
260
+
261
+ revision_success_rate = flags_df.stack().mean()
262
+ return flags_df, revision_success_rate
263
+
264
+ def granger_causality_test(
265
+ self,
266
+ ts_effect: MTTimeSeries,
267
+ add_const: bool = True,
268
+ ts_effect_to_darts_ts_kwargs: Dict[Any, Any] = {},
269
+ ts_cause_to_darts_ts_kwargs: Dict[Any, Any] = {},
270
+ max_lags: Optional[int] = None,
271
+ ) -> Dict[str, Any]:
272
+ """
273
+ Provides four tests for granger non causality of 2 time series
274
+ using statsmodels.tsa.stattools.grangercausalitytests().
275
+
276
+ The current series (i.e. self.ts) is the causal series and the
277
+ provided ts_effect is the effect series. The null hypothesis is
278
+ that the causal series does not granger cause the effect series.
279
+
280
+ Args:
281
+ ts_effect (MTTimeSeries): The time series to test as the effect.
282
+ to_effect_df_kwargs (Dict[str, Any]): Arguments to pass to the effect time series when converting to a DataFrame prior to converting to a Darts time series.
283
+ ts_effect_to_darts_ts_kwargs (Dict[Any, Any]): Arguments to pass to the effect time series when converting to a Darts time series.
284
+ to_cause_df_kwargs (Dict[str, Any]): Arguments to pass to the causal time series when converting to a DataFrame prior to converting to a Darts time series.
285
+ ts_cause_to_darts_ts_kwargs (Dict[Any, Any]): Arguments to pass to the causal time series when converting to a Darts time series.
286
+ add_const (bool): Whether to add a constant term to the regression. Defaults to True.
287
+ max_lags (Optional[int]): The maximum number of lags to test for granger causality. Defaults to None, which utilizes the frequency of the MTTimeSeries.
288
+
289
+ Returns:
290
+ Dict[str, Any]: A dictionary containing the results of the
291
+ granger causality tests for each lag, including test statistics
292
+ and p-values.
293
+ """
294
+ effect_ts = ts_effect.to_darts_timeseries(**ts_effect_to_darts_ts_kwargs)
295
+ causal_ts = self.ts.to_darts_timeseries(**ts_cause_to_darts_ts_kwargs)
296
+ max_lags = max_lags or self.ts.metadata.get_frequency_as_numeric()
297
+
298
+ res = granger_causality_tests(
299
+ ts_cause=causal_ts,
300
+ ts_effect=effect_ts,
301
+ maxlag=max_lags,
302
+ addconst=add_const,
303
+ )
304
+
305
+ return res
306
+
307
+ def revision_cross_correlogram(
308
+ self,
309
+ max_vintage_lag: Optional[int] = None,
310
+ max_observation_lag: Optional[int] = None,
311
+ ) -> Dict[str, Any]:
312
+ """
313
+ Compute revision cross-correlogram over vintage and observation lags.
314
+
315
+ Cell (observation_lag=b, vintage_lag=a) is the Pearson correlation between:
316
+ R[t, v] and R[t-b, v-a]
317
+ where R is the revision matrix (diff across release-date columns).
318
+
319
+ Args:
320
+ max_vintage_lag (Optional[int]): Maximum lag across vintage columns.
321
+ Defaults to full feasible range.
322
+ max_observation_lag (Optional[int]): Maximum lag across observation rows.
323
+ Defaults to full feasible range.
324
+
325
+ Returns:
326
+ Dict[str, Any]: A dictionary containing:
327
+ - correlogram: ndarray with shape (observation_lags, vintage_lags)
328
+ - observation_lags: Observation lag indices (y-axis)
329
+ - vintage_lags: Vintage lag indices (x-axis)
330
+ - pair_counts: Number of valid revision pairs used per cell
331
+ """
332
+ if max_vintage_lag is not None and max_vintage_lag < 0:
333
+ raise ValueError("max_vintage_lag must be >= 0")
334
+ if max_observation_lag is not None and max_observation_lag < 0:
335
+ raise ValueError("max_observation_lag must be >= 0")
336
+
337
+ vm = self.ts.generate_vintage_matrix()
338
+ if vm.empty or vm.shape[1] == 0:
339
+ raise ValueError("Vintage matrix is empty, cannot compute correlogram")
340
+
341
+ revisions = vm.diff(axis=1).iloc[:, 1:]
342
+ if revisions.shape[1] == 0 or revisions.shape[0] == 0:
343
+ raise ValueError(
344
+ "At least two release dates are required to compute revision series"
345
+ )
346
+
347
+ revision_values = revisions.to_numpy(dtype=float)
348
+ n_obs, n_vintage = revision_values.shape
349
+
350
+ max_vintage_lag = (
351
+ n_vintage - 1
352
+ if max_vintage_lag is None
353
+ else min(max_vintage_lag, n_vintage - 1)
354
+ )
355
+ max_observation_lag = (
356
+ n_obs - 1
357
+ if max_observation_lag is None
358
+ else min(max_observation_lag, n_obs - 1)
359
+ )
360
+
361
+ correlogram = np.full(
362
+ (max_observation_lag + 1, max_vintage_lag + 1), np.nan, dtype=float
363
+ )
364
+ pair_counts = np.zeros(
365
+ (max_observation_lag + 1, max_vintage_lag + 1), dtype=int
366
+ )
367
+
368
+ for obs_lag in range(max_observation_lag + 1):
369
+ for vintage_lag in range(max_vintage_lag + 1):
370
+ current = revision_values[obs_lag:, vintage_lag:]
371
+ lagged = revision_values[: n_obs - obs_lag, : n_vintage - vintage_lag]
372
+
373
+ x = current.reshape(-1)
374
+ y = lagged.reshape(-1)
375
+ valid_mask = np.isfinite(x) & np.isfinite(y)
376
+ n_pairs = int(valid_mask.sum())
377
+ pair_counts[obs_lag, vintage_lag] = n_pairs
378
+ if n_pairs < 2:
379
+ continue
380
+
381
+ x_valid = x[valid_mask]
382
+ y_valid = y[valid_mask]
383
+ if x_valid.std(ddof=0) == 0.0 or y_valid.std(ddof=0) == 0.0:
384
+ continue
385
+ correlogram[obs_lag, vintage_lag] = np.corrcoef(x_valid, y_valid)[0, 1]
386
+
387
+ return {
388
+ "correlogram": correlogram,
389
+ "observation_lags": list(range(max_observation_lag + 1)),
390
+ "vintage_lags": list(range(max_vintage_lag + 1)),
391
+ "pair_counts": pair_counts,
392
+ }
393
+
394
+ def revision_biasedness_regression(
395
+ self,
396
+ independent_vintage_index: int | str,
397
+ dependent_vintage_index: int | str,
398
+ alpha: float = 0.05,
399
+ ) -> BiasednessRegressionResult:
400
+ """
401
+ Regresses the dependent vintage (Y) on the independent vintage (X):
402
+ Y = alpha + beta * X + u
403
+
404
+ If the independent vintage is an unbiased predictor of the dependent vintage,
405
+ then alpha should be close to zero and beta should be close to one.
406
+
407
+ If unbiasedness cannot be rejected, then alpha_hat and beta_hat should be a
408
+ a better estimator of Y than X alone.
409
+
410
+ args:
411
+ independent_vintage_index (int | str): Vintage index used as the independent variable (X).
412
+ 1-based indexing. Use -1 or "latest" for last non-NaN vintage.
413
+ dependent_vintage_index (int | str): Vintage index used as the dependent variable (Y).
414
+ 1-based indexing. Use -1 or "latest" for last non-NaN vintage.
415
+ alpha (float): Significance level for tests.
416
+
417
+ returns:
418
+ BiasednessRegressionResult: Results including OLS estimates and tests of unbiasedness.
419
+
420
+ """
421
+
422
+ if independent_vintage_index is None or dependent_vintage_index is None:
423
+ raise ValueError(
424
+ "Both independent_vintage_index and dependent_vintage_index are required."
425
+ )
426
+
427
+ merged, data_notes = self._prepare_indexed_vintage_regression_data(
428
+ independent_vintage_index=independent_vintage_index,
429
+ dependent_vintage_index=dependent_vintage_index,
430
+ )
431
+
432
+ y = merged["y"].to_numpy(dtype=float)
433
+ x = merged["x"].to_numpy(dtype=float)
434
+
435
+ X = sm.add_constant(x, has_constant="add")
436
+ model = sm.OLS(y, X)
437
+ result = model.fit()
438
+
439
+ return self._prepare_biasedness_regression_output(
440
+ result=result,
441
+ alpha=alpha,
442
+ independent_vintage_index=independent_vintage_index,
443
+ dependent_vintage_index=dependent_vintage_index,
444
+ data_notes=data_notes,
445
+ )
446
+
447
+ def revision_uncertainty(
448
+ self,
449
+ forecast_method: str = "ARIMA",
450
+ min_train_size: int = 4,
451
+ model_kwargs: Optional[Dict[str, Any]] = None,
452
+ ) -> Dict[str, float]:
453
+ """
454
+ Revision uncertainty calculation from Runkle (1998).
455
+
456
+ Args:
457
+ forecast_method (str): The method to use for forecasting. Supported methods are
458
+ "ExponentialSmoothing", "Naive", "ARIMA", and "LinearRegression".
459
+ Defaults to AR(2). I.e. ARIMA with order (2, 0, 0).
460
+ min_train_size (int): Minimum number of observations required to fit the model
461
+ in the rolling forecast loop. Defaults to 3.
462
+ model_kwargs (Optional[Dict[str, Any]]): Optional keyword arguments to pass to the
463
+ forecasting model upon initialization. For ARIMA models, you can specify
464
+ {'order': (p, d, q)} to customize the model order. Defaults to None.
465
+
466
+ Returns:
467
+ Dict[str, float]: A dictionary containing:
468
+ - "std_dev_forecast_errors": Standard deviation of rolling forecast errors.
469
+ - "std_dev_revisions": Standard deviation of final-minus-initial revisions.
470
+ - "ratio": Ratio of std_dev_revisions to std_dev_forecast_errors.
471
+
472
+ Note:
473
+ ARIMA requires at least 30 observations per training window (darts
474
+ limitation, see https://github.com/unit8co/darts/pull/2353). Rolling
475
+ steps with shorter windows are skipped, which can yield an empty
476
+ forecast-error sample and a NaN ratio on short series.
477
+ """
478
+ forecast_methods = {
479
+ "ExponentialSmoothing": ExponentialSmoothing,
480
+ "Naive": NaiveDrift,
481
+ "LinearRegression": LinearRegressionModel,
482
+ "ARIMA": ARIMA,
483
+ }
484
+
485
+ if forecast_method not in forecast_methods.keys():
486
+ raise ValueError(
487
+ f"Invalid forecast method: {forecast_method}. Supported methods are: {forecast_methods}"
488
+ )
489
+
490
+ # Initialize model_kwargs if not provided
491
+ if model_kwargs is None:
492
+ model_kwargs = {}
493
+
494
+ # Set ARIMA default to AR(2) if not specified.
495
+ # TODO: darts enforces a 30-observation minimum on ARIMA training windows,
496
+ # so rolling steps below that are silently skipped via the ValueError handler
497
+ # in the loop below. Remove this note once unit8co/darts#2353 lands and the
498
+ # minimum is configurable. https://github.com/unit8co/darts/pull/2353
499
+ if forecast_method == "ARIMA":
500
+ model_kwargs.setdefault("p", 2)
501
+ model_kwargs.setdefault("d", 0)
502
+ model_kwargs.setdefault("q", 0)
503
+ model_kwargs.setdefault("seasonal_order", (0, 0, 0, 0))
504
+ model_kwargs.setdefault("trend", "n")
505
+
506
+ model_class = forecast_methods[forecast_method]
507
+
508
+ # Rolling one-step-ahead forecast errors across time using the latest vintage.
509
+ forecast_errors = []
510
+ darts_ts = self.ts.to_darts_timeseries()
511
+
512
+ if len(darts_ts) >= (min_train_size + 1):
513
+
514
+ for i in range(min_train_size, len(darts_ts)):
515
+ train = darts_ts[:i]
516
+ test = darts_ts[i : i + 1]
517
+ model = model_class(**model_kwargs)
518
+ try:
519
+ model.fit(train)
520
+ except ValueError as exc:
521
+ logger.info(
522
+ "Skipping rolling forecast step with training window %s: %s",
523
+ len(train),
524
+ exc,
525
+ )
526
+ continue
527
+ forecast = model.predict(1)
528
+ forecast_value = float(forecast.values().reshape(-1)[0])
529
+ actual_value = float(test.values().reshape(-1)[0])
530
+ forecast_errors.append(forecast_value - actual_value)
531
+ else:
532
+ logger.info(
533
+ "Not enough observations to perform rolling forecast; "
534
+ f"need at least {min_train_size + 1}, have {len(darts_ts)}."
535
+ )
536
+
537
+ std_dev_forecast_errors = pd.Series(forecast_errors).std()
538
+
539
+ # Final-minus-initial revisions for each timestamp.
540
+ vm = self.ts.generate_vintage_matrix()
541
+
542
+ def _final_minus_initial(row: pd.Series) -> Optional[float]:
543
+ non_na = row.dropna()
544
+ if len(non_na) < 2:
545
+ return None
546
+ return non_na.iloc[-1] - non_na.iloc[0]
547
+
548
+ final_initial_revisions = vm.apply(_final_minus_initial, axis=1).dropna()
549
+ std_revisions = final_initial_revisions.std()
550
+
551
+ return {
552
+ "std_dev_forecast_errors": std_dev_forecast_errors,
553
+ "std_dev_revisions": std_revisions,
554
+ "ratio": std_revisions / std_dev_forecast_errors,
555
+ }
556
+
557
+ def decompose_vintage(
558
+ self,
559
+ model: Union[str, SeasonalityMode, ModelMode] = ModelMode.MULTIPLICATIVE,
560
+ method: str = "naive",
561
+ to_darts_timeseries_kwargs: Optional[Dict[str, Any]] = None,
562
+ **kwargs: Any,
563
+ ) -> Dict[str, Any]:
564
+ """
565
+ Decompose the current vintage into trend and seasonal components.
566
+
567
+ Args:
568
+ model (Union[str, SeasonalityMode, ModelMode]): Decomposition type. Accepts
569
+ "additive"/"multiplicative" strings or Darts enum values.
570
+ method (str): Decomposition method ("naive", "STL", or "MSTL").
571
+ to_darts_timeseries_kwargs (Optional[Dict[str, Any]]): Keyword arguments passed to ``self.ts.to_darts_timeseries``.
572
+ **kwargs (Any): Additional keyword arguments passed to Darts decomposition.
573
+
574
+ Returns:
575
+ Dict[str, Any]: Dictionary with release_date, trend TimeSeries, and seasonal TimeSeries.
576
+ """
577
+ if to_darts_timeseries_kwargs is None:
578
+ to_darts_timeseries_kwargs = {}
579
+
580
+ if isinstance(model, str):
581
+ normalized_model = model.strip().lower()
582
+ if normalized_model == "additive":
583
+ model = ModelMode.ADDITIVE
584
+ elif normalized_model == "multiplicative":
585
+ model = ModelMode.MULTIPLICATIVE
586
+ else:
587
+ raise ValueError(
588
+ "Invalid model string. Supported values are 'additive' and 'multiplicative'."
589
+ )
590
+
591
+ seasonal_period = self.ts.metadata.get_frequency_as_numeric()
592
+ darts_ts = self.ts.to_darts_timeseries(**to_darts_timeseries_kwargs)
593
+ trend, seasonal = extract_trend_and_seasonality(
594
+ ts=darts_ts,
595
+ freq=seasonal_period,
596
+ model=model,
597
+ method=method,
598
+ **kwargs,
599
+ )
600
+
601
+ return {
602
+ "release_date": self.ts.release_date,
603
+ "trend": trend,
604
+ "seasonal": seasonal,
605
+ }
606
+
607
+ def select_vintage_by_index(
608
+ self,
609
+ vintage_index: int | str,
610
+ include_vintage_date: bool = True,
611
+ dropna: bool = True,
612
+ ) -> pd.DataFrame:
613
+ """
614
+ Select the vintage value by 1-based index (or -1/"latest") for each timestamp.
615
+
616
+ Args:
617
+ vintage_index (int | str): 1-based index, -1, or "latest".
618
+ include_vintage_date (bool): Include the release date of the selected vintage.
619
+ dropna (bool): Drop rows where the indexed vintage is missing.
620
+
621
+ Returns:
622
+ pd.DataFrame: Columns include 'timestamp', 'value', and optionally 'vintage_date'.
623
+ """
624
+ self._warn_if_vintage_filters()
625
+ return self._select_vintage_df(
626
+ vintage_index=vintage_index,
627
+ include_vintage_date=include_vintage_date,
628
+ dropna=dropna,
629
+ )
630
+
631
+ def vintage_comparison(
632
+ self, vintage_dates: List[str], mode: str = "growth", strategy: str = "all"
633
+ ) -> "VintageComparison":
634
+ """
635
+ Compare vintages across summary measures describing revisions of a
636
+ time series, adapted from Young (1974). Comparison uses only
637
+ observations present in both vintages (inner join on timestamp).
638
+
639
+ For each pair of vintages, let **I** denote the value at each
640
+ timestamp in the initial (earlier) vintage and **L** denote the
641
+ value at the same timestamp in the latest (later) vintage. The
642
+ meaning of "value" depends on ``mode``:
643
+
644
+ - ``mode="growth"``: I and L are within-vintage period-over-period
645
+ growth rates (computed via ``pct_change`` on each vintage
646
+ independently before comparison). Metrics describe revisions to
647
+ the **growth rate**.
648
+ - ``mode="levels"``: I and L are the raw level values themselves.
649
+ Metrics describe revisions to the **level**.
650
+
651
+ **Sign convention.** Following Young (1974), revisions are computed
652
+ as ``I - L`` (preliminary minus final). A NEGATIVE revision therefore
653
+ means the series was revised UP between vintages (L > I), and a
654
+ POSITIVE revision means it was revised DOWN. The metric *names*
655
+ ("upward", "downward") describe what happened to the underlying
656
+ series; the *sign* of the corresponding number is the opposite.
657
+
658
+ Common measures (both modes):
659
+ - bias: mean(I - L)
660
+ - relative_bias: mean(I - L) / mean(L)
661
+ - dispersion: mean(|I - L|)
662
+ - relative_dispersion: mean(|I - L|) / mean(|L|)
663
+ - largest_upward_revision: min(I - L) (most negative since I-L<0 means L>I)
664
+ - largest_downward_revision: max(I - L)
665
+ - standard_deviation_of_revisions_difference: sample std(I - L)
666
+ - counts: number of upward, downward, and no-change revisions.
667
+
668
+ Mode-specific directional-miss measures:
669
+ - growth mode:
670
+ - directional_misses_trend: fraction of timestamps where the
671
+ growth rate changes sign between vintages (rose vs fell).
672
+ - levels mode:
673
+ - directional_misses_sign: fraction of timestamps where the
674
+ level itself changes sign between vintages
675
+ (e.g. a trade balance crossing zero).
676
+ - directional_misses_trend: fraction of timestamps where the
677
+ period-over-period change in the level changes sign between vintages.
678
+
679
+ Args:
680
+ vintage_dates (List[str]): A list of vintage identifiers to compare.
681
+ mode (str): The mode of comparison ("growth" or "levels").
682
+ strategy (str): The strategy for comparison ("sequential", "final", or "all").
683
+
684
+ Returns:
685
+ VintageComparison: Object whose ``comparison`` attribute maps pair labels to per-pair metric dicts.
686
+ """
687
+ # Resolve each requested date via as_of() and key on the resolved
688
+ # vintage's release_date so output labels reflect what we actually compared.
689
+ vintage_objs: Dict[str, "MTTimeSeries"] = {}
690
+ resolutions: Dict[str, List[str]] = {}
691
+ for requested in vintage_dates:
692
+ resolved = self.ts.as_of(requested)
693
+ resolved_key = resolved.release_date.isoformat()
694
+ logger.info(
695
+ "vintage_comparison: requested %s resolved to release %s",
696
+ requested,
697
+ resolved_key,
698
+ )
699
+ resolutions.setdefault(resolved_key, []).append(requested)
700
+ vintage_objs[resolved_key] = resolved
701
+
702
+ # Two requested dates that resolve to the same release will collapse to a single entry here.
703
+ collapsed = {k: v for k, v in resolutions.items() if len(v) > 1}
704
+ if collapsed:
705
+ logger.warning(
706
+ "vintage_comparison: %s requested dates collapsed to %s unique "
707
+ "vintages. Collapses (resolved_release -> requested_dates): %s",
708
+ len(vintage_dates),
709
+ len(vintage_objs),
710
+ collapsed,
711
+ )
712
+
713
+ comparison = VintageComparison(
714
+ vintages=vintage_objs,
715
+ mode=mode,
716
+ strategy=strategy,
717
+ )
718
+ return comparison
719
+
720
+ def _durbin_watson(self, resid: np.ndarray) -> Optional[float]:
721
+ if len(resid) < 2:
722
+ return None
723
+ stat = float(durbin_watson(resid))
724
+ if not np.isfinite(stat):
725
+ return None
726
+ return stat
727
+
728
+ def _ljung_box(self, resid: np.ndarray, lags: int, alpha: float) -> Dict[str, Any]:
729
+ n = len(resid)
730
+ effective_lags = max(1, min(lags, n - 1))
731
+ if n < 3:
732
+ return {
733
+ "stat": None,
734
+ "pvalue": None,
735
+ "lags": effective_lags,
736
+ "alpha": alpha,
737
+ "pass": False,
738
+ "note": "Insufficient observations for Ljung-Box test",
739
+ }
740
+ lb = acorr_ljungbox(resid, lags=[effective_lags], return_df=True)
741
+ stat = float(lb["lb_stat"].iloc[0])
742
+ pvalue = float(lb["lb_pvalue"].iloc[0])
743
+ return {
744
+ "stat": stat,
745
+ "pvalue": pvalue,
746
+ "lags": effective_lags,
747
+ "alpha": alpha,
748
+ "pass": pvalue >= alpha,
749
+ }
750
+
751
+ def _normalize_index(self, value: int | str) -> int | str:
752
+ if isinstance(value, str):
753
+ if value.lower() != "latest":
754
+ raise ValueError(
755
+ "Vintage index string must be 'latest' if provided as text."
756
+ )
757
+ return "latest"
758
+ if isinstance(value, int):
759
+ if value == -1:
760
+ return "latest"
761
+ if value < 1:
762
+ raise ValueError("Vintage index must be >= 1 or use -1/'latest'.")
763
+ return value
764
+ raise ValueError("Vintage index must be an int or 'latest'.")
765
+
766
+ def _prepare_indexed_vintage_regression_data(
767
+ self,
768
+ independent_vintage_index: int | str,
769
+ dependent_vintage_index: int | str,
770
+ ) -> Tuple[pd.DataFrame, Dict[str, Any]]:
771
+ vintage_filters_applied = self._warn_if_vintage_filters()
772
+
773
+ vm = self.ts.generate_vintage_matrix()
774
+ if vm.empty:
775
+ raise ValueError("No vintage data available for regression.")
776
+ vm = vm.sort_index(axis=1)
777
+
778
+ x_df = self._select_vintage_df(
779
+ vintage_index=independent_vintage_index,
780
+ include_vintage_date=False,
781
+ dropna=False,
782
+ vm=vm,
783
+ ).rename(columns={"value": "x"})
784
+ y_df = self._select_vintage_df(
785
+ vintage_index=dependent_vintage_index,
786
+ include_vintage_date=False,
787
+ dropna=False,
788
+ vm=vm,
789
+ ).rename(columns={"value": "y"})
790
+
791
+ df = pd.merge(x_df, y_df, on="timestamp", how="inner")
792
+ df = df.sort_values("timestamp").reset_index(drop=True)
793
+
794
+ missing_x = int(df["x"].isna().sum())
795
+ missing_y = int(df["y"].isna().sum())
796
+ merged = df.dropna().reset_index(drop=True)
797
+ dropped = int(len(df) - len(merged))
798
+
799
+ if dropped > 0:
800
+ logger.debug(
801
+ "Dropped %s rows due to missing indexed vintages (x missing=%s, y missing=%s).",
802
+ dropped,
803
+ missing_x,
804
+ missing_y,
805
+ )
806
+
807
+ if merged.empty:
808
+ raise ValueError(
809
+ "No overlapping observations after applying vintage index selection."
810
+ )
811
+
812
+ data_notes = {
813
+ "missing_x": missing_x,
814
+ "missing_y": missing_y,
815
+ "dropped_rows": dropped,
816
+ "vintage_filters_applied": vintage_filters_applied,
817
+ }
818
+
819
+ return merged, data_notes
820
+
821
+ def _prepare_biasedness_regression_output(
822
+ self,
823
+ result: sm.regression.linear_model.RegressionResultsWrapper,
824
+ alpha: float,
825
+ independent_vintage_index: int | str,
826
+ dependent_vintage_index: int | str,
827
+ data_notes: Dict[str, Any],
828
+ ) -> BiasednessRegressionResult:
829
+ alpha_hat = float(result.params[0])
830
+ beta_hat = float(result.params[1])
831
+ se_alpha = float(result.bse[0])
832
+ se_beta = float(result.bse[1])
833
+
834
+ t_alpha = alpha_hat / se_alpha if se_alpha != 0 else None
835
+ t_beta = (beta_hat - 1.0) / se_beta if se_beta != 0 else None
836
+ df_resid = int(result.df_resid)
837
+
838
+ p_alpha = (
839
+ float(2 * t_dist.sf(abs(t_alpha), df_resid))
840
+ if t_alpha is not None
841
+ else None
842
+ )
843
+ p_beta = (
844
+ float(2 * t_dist.sf(abs(t_beta), df_resid)) if t_beta is not None else None
845
+ )
846
+
847
+ conf_int = result.conf_int(alpha=alpha)
848
+ alpha_ci = (float(conf_int[0][0]), float(conf_int[0][1]))
849
+ beta_ci = (float(conf_int[1][0]), float(conf_int[1][1]))
850
+
851
+ f_test = result.f_test("const = 0, x1 = 1")
852
+ f_stat = float(np.asarray(f_test.fvalue).item())
853
+ f_pvalue = float(np.asarray(f_test.pvalue).item())
854
+ df_num = int(f_test.df_num)
855
+ df_den = int(f_test.df_denom)
856
+
857
+ dw = self._durbin_watson(result.resid)
858
+ lb = self._ljung_box(result.resid, lags=1, alpha=alpha)
859
+
860
+ def _stars(pvalue: Optional[float]) -> str:
861
+ if pvalue is None:
862
+ return ""
863
+ return "*" if pvalue < alpha else ""
864
+
865
+ def _fmt(value: Optional[float], digits: int = 4) -> str:
866
+ if value is None or not np.isfinite(value):
867
+ return "NA"
868
+ return f"{value:.{digits}f}"
869
+
870
+ coef_table = tabulate(
871
+ [
872
+ [
873
+ "alpha (const)",
874
+ f"{_fmt(alpha_hat)}{_stars(p_alpha)}",
875
+ _fmt(se_alpha),
876
+ _fmt(t_alpha),
877
+ _fmt(p_alpha),
878
+ _fmt(alpha_ci[0]),
879
+ _fmt(alpha_ci[1]),
880
+ ],
881
+ [
882
+ "beta (x)",
883
+ f"{_fmt(beta_hat)}{_stars(p_beta)}",
884
+ _fmt(se_beta),
885
+ _fmt(t_beta),
886
+ _fmt(p_beta),
887
+ _fmt(beta_ci[0]),
888
+ _fmt(beta_ci[1]),
889
+ ],
890
+ ],
891
+ headers=[
892
+ "Param",
893
+ "Estimate",
894
+ "Std Err",
895
+ "t (H0)",
896
+ "p (H0)",
897
+ f"CI Low ({int((1 - alpha) * 100)}%)",
898
+ f"CI High ({int((1 - alpha) * 100)}%)",
899
+ ],
900
+ tablefmt="pretty",
901
+ )
902
+
903
+ return BiasednessRegressionResult(
904
+ n_total=int(result.nobs),
905
+ vintage_indices={
906
+ "independent": independent_vintage_index,
907
+ "dependent": dependent_vintage_index,
908
+ "index_base": 1,
909
+ },
910
+ data_notes=data_notes,
911
+ model={
912
+ "alpha": alpha_hat,
913
+ "beta": beta_hat,
914
+ "alpha_ci": {"low": alpha_ci[0], "high": alpha_ci[1]},
915
+ "beta_ci": {"low": beta_ci[0], "high": beta_ci[1]},
916
+ "rss": float(result.ssr),
917
+ "s2": float(result.mse_resid),
918
+ "r2": float(result.rsquared),
919
+ "n": int(result.nobs),
920
+ "durbin_watson": dw,
921
+ },
922
+ tests={
923
+ "alpha_eq_0": {"t": t_alpha, "pvalue": p_alpha},
924
+ "beta_eq_1": {"t": t_beta, "pvalue": p_beta},
925
+ "unbiasedness": {
926
+ "f_stat": f_stat,
927
+ "pvalue": f_pvalue,
928
+ "df_num": df_num,
929
+ "df_den": df_den,
930
+ "alpha": alpha,
931
+ "reject": f_pvalue < alpha,
932
+ },
933
+ },
934
+ assumptions={
935
+ "random_residuals": {
936
+ "test": "ljung_box",
937
+ **lb,
938
+ }
939
+ },
940
+ table=coef_table,
941
+ )
942
+
943
+ def _select_vintage_df(
944
+ self,
945
+ vintage_index: int | str,
946
+ include_vintage_date: bool = True,
947
+ dropna: bool = True,
948
+ vm: Optional[pd.DataFrame] = None,
949
+ ) -> pd.DataFrame:
950
+ idx = self._normalize_index(vintage_index)
951
+ if vm is None:
952
+ vm = self.ts.generate_vintage_matrix()
953
+ if vm.empty:
954
+ raise ValueError("No vintage data available for selection.")
955
+ vm = vm.sort_index(axis=1)
956
+
957
+ def _select_row(row: pd.Series) -> Tuple[float, pd.Timestamp]:
958
+ non_na = row.dropna()
959
+ if non_na.empty:
960
+ return np.nan, pd.NaT
961
+ if idx == "latest":
962
+ return float(non_na.iloc[-1]), non_na.index[-1]
963
+ pos = idx - 1
964
+ if pos >= len(non_na):
965
+ return np.nan, pd.NaT
966
+ return float(non_na.iloc[pos]), non_na.index[pos]
967
+
968
+ selected = vm.apply(_select_row, axis=1, result_type="expand")
969
+ selected.columns = ["value", "vintage_date"]
970
+ selected.index.name = "timestamp"
971
+ df = selected.reset_index()
972
+
973
+ if not include_vintage_date:
974
+ df = df.drop(columns=["vintage_date"])
975
+
976
+ if dropna:
977
+ df = df.dropna(subset=["value"]).reset_index(drop=True)
978
+
979
+ return df
980
+
981
+ def _warn_if_vintage_filters(self) -> bool:
982
+ applied = (
983
+ self.ts.vintage_start_date is not None
984
+ or self.ts.vintage_end_date is not None
985
+ )
986
+ if applied:
987
+ logger.warning(
988
+ "Vintage date filters are currently applied. This may affect indexed vintage selection."
989
+ )
990
+ return applied