macrotrace 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrotrace-0.2.0 → macrotrace-0.2.2}/.github/workflows/docs.yml +1 -1
- {macrotrace-0.2.0 → macrotrace-0.2.2}/CHANGELOG.md +24 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/PKG-INFO +2 -6
- {macrotrace-0.2.0 → macrotrace-0.2.2}/README.md +1 -5
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/time_series.py +392 -28
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/base.py +7 -1
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/fred.py +2 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/ons.py +2 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/rtdsm.py +2 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/series/test_series.py +223 -3
- {macrotrace-0.2.0 → macrotrace-0.2.2}/.github/workflows/ci.yml +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/.github/workflows/release.yml +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/.gitignore +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/.pre-commit-config.yaml +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/.python-version +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/LICENSE +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/__init__.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/_paths.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/cli.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/graphing.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/__init__.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/db.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/__init__.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/analysis.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/observation.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/plotter.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/series_metadata.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/__init__.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/cli.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/common.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/tui.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/py.typed +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/__init__.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/example.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/pyproject.toml +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/scripts/backstop_ingest.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/assets/mt/time_series/expected_vm.csv +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/assets/mt/time_series/from_dataframe.csv +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/assets/mt/time_series/from_dataframe_with_tz.csv +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/series/test_db_path_forwarding.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/series/test_init.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/test_analysis.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/test_metadata.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/test_plotter.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/utils.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/test_db_models.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_cli.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_common.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_root_cli.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_tui.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/utils.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/fixtures.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_api_client.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_dataset_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_observation_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_release_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_series_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_update_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_update_state.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_db_path_resolution.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/fixtures.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_api_client.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_dataset_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_observation_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_release_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_series_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_tz_handling.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_update_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/fixtures.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_api_client.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_dataset_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_observation_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_release_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_series_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_update_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/fixtures.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_api_client.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_dataset_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_helpers.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_observation_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_release_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_series_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_update_manager.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/test_package_init.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/test_paths.py +0 -0
- {macrotrace-0.2.0 → macrotrace-0.2.2}/uv.lock +0 -0
|
@@ -37,7 +37,7 @@ jobs:
|
|
|
37
37
|
- name: Deploy dev docs (push to main)
|
|
38
38
|
if: github.ref == 'refs/heads/main'
|
|
39
39
|
run: |
|
|
40
|
-
uv run mike deploy --push --update-aliases dev
|
|
40
|
+
uv run mike deploy --push --update-aliases --prop-set hidden=true dev
|
|
41
41
|
if ! uv run mike list 2>/dev/null | grep -qE '^[0-9]'; then
|
|
42
42
|
uv run mike set-default --push dev
|
|
43
43
|
fi
|
|
@@ -3,6 +3,30 @@
|
|
|
3
3
|
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/);
|
|
4
4
|
versions follow [SemVer](https://semver.org/).
|
|
5
5
|
|
|
6
|
+
## 0.2.2 — 2026-06-12
|
|
7
|
+
|
|
8
|
+
- **Vintage matching:** `identify_vintage` now interprets a tz-naive index in
|
|
9
|
+
the source's native timezone (e.g. midnight US Central for FRED) instead of
|
|
10
|
+
UTC, so plain dates match FRED vintages.
|
|
11
|
+
- **Vintage matching:** Added a `decimals` argument that rounds both sides
|
|
12
|
+
before comparison, for matching data published at a fixed precision.
|
|
13
|
+
- **Vintage matching:** `VintageMatch.failure_reason` now reports why nothing
|
|
14
|
+
matched: timestamps no vintage contains (`"coverage"`) vs value
|
|
15
|
+
disagreements (`"values"`).
|
|
16
|
+
- **Vintage matching:** Numeric/positional indexes are rejected with a clear
|
|
17
|
+
error, and `pd.PeriodIndex` is supported.
|
|
18
|
+
- **Vintage matching:** When nothing matches, `VintageMatch.alignment_hint`
|
|
19
|
+
flags timestamps that would match under a wrong timezone localization, a
|
|
20
|
+
constant time shift, or a month-end vs month-start convention.
|
|
21
|
+
|
|
22
|
+
## 0.2.1 — 2026-06-11
|
|
23
|
+
|
|
24
|
+
- **Docs:** RTDSM is now listed as an available source on the documentation
|
|
25
|
+
homepage — it had been left under "Coming Soon" when 0.2.0 shipped.
|
|
26
|
+
- **Docs:** The version selector now shows the `latest` label next to the
|
|
27
|
+
release it points at, and the in-development `dev` build is hidden from
|
|
28
|
+
the selector (it is still reachable directly at `/dev/`).
|
|
29
|
+
|
|
6
30
|
## 0.2.0 — 2026-06-10
|
|
7
31
|
|
|
8
32
|
- **Sources:** Added the Federal Reserve Bank of Philadelphia's Real-Time
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrotrace
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: A Python library for managing and analyzing macroeconomic time series data with vintage awareness.
|
|
5
5
|
Project-URL: Homepage, https://github.com/john-ramsey/macrotrace
|
|
6
6
|
Project-URL: Repository, https://github.com/john-ramsey/macrotrace
|
|
@@ -170,13 +170,9 @@ if match.is_ambiguous:
|
|
|
170
170
|
elif match.matched:
|
|
171
171
|
print(f"Matches the {match.release_date.date()} vintage")
|
|
172
172
|
else:
|
|
173
|
-
print("No matching vintage found")
|
|
173
|
+
print(f"No matching vintage found (failed on: {match.failure_reason})")
|
|
174
174
|
```
|
|
175
175
|
|
|
176
|
-
A match is ambiguous when the data is unchanged across consecutive vintages, so
|
|
177
|
-
the values alone cannot pin down a single release; `release_dates` lists every
|
|
178
|
-
consistent vintage in that case.
|
|
179
|
-
|
|
180
176
|
## Command-Line Tools
|
|
181
177
|
|
|
182
178
|
MacroTrace includes command-line tools for exploring ONS datasets:
|
|
@@ -129,13 +129,9 @@ if match.is_ambiguous:
|
|
|
129
129
|
elif match.matched:
|
|
130
130
|
print(f"Matches the {match.release_date.date()} vintage")
|
|
131
131
|
else:
|
|
132
|
-
print("No matching vintage found")
|
|
132
|
+
print(f"No matching vintage found (failed on: {match.failure_reason})")
|
|
133
133
|
```
|
|
134
134
|
|
|
135
|
-
A match is ambiguous when the data is unchanged across consecutive vintages, so
|
|
136
|
-
the values alone cannot pin down a single release; `release_dates` lists every
|
|
137
|
-
consistent vintage in that case.
|
|
138
|
-
|
|
139
135
|
## Command-Line Tools
|
|
140
136
|
|
|
141
137
|
MacroTrace includes command-line tools for exploring ONS datasets:
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, List, Optional, Dict, Any
|
|
1
|
+
from typing import TYPE_CHECKING, List, Optional, Dict, Any, Tuple
|
|
2
2
|
from dataclasses import dataclass, replace
|
|
3
3
|
from dateutil import parser
|
|
4
|
-
from datetime import datetime, timezone
|
|
4
|
+
from datetime import datetime, timedelta, timezone, tzinfo
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
|
+
from pandas.tseries.frequencies import get_period_alias
|
|
8
9
|
from tabulate import tabulate
|
|
9
10
|
from darts import TimeSeries
|
|
10
11
|
from peewee import JOIN
|
|
@@ -32,11 +33,15 @@ logger = logging.getLogger(__name__)
|
|
|
32
33
|
VALID_SOURCES = ["FRED", "ONS", "RTDSM", "USER"]
|
|
33
34
|
# USER is for user provided data, not from an API
|
|
34
35
|
|
|
36
|
+
# With fewer observations than this, a constant-shift scan can match a vintage
|
|
37
|
+
# by coincidence, so identify_vintage only reports shift hints above it.
|
|
38
|
+
MIN_OBSERVATIONS_FOR_SHIFT_DETECTION = 5
|
|
39
|
+
|
|
35
40
|
|
|
36
41
|
@dataclass
|
|
37
42
|
class VintageMatch:
|
|
38
43
|
"""
|
|
39
|
-
Result of matching
|
|
44
|
+
Result of matching a data series with an unknown release date against the vintages of an MTTimeSeries (see ``MTTimeSeries.identify_vintage``).
|
|
40
45
|
|
|
41
46
|
A match is ambiguous when the supplied data is consistent with more than one vintage.
|
|
42
47
|
This is common when the data only covers observations that were never revised across a run of consecutive vintages, so the values alone cannot pin down a single release.
|
|
@@ -46,12 +51,22 @@ class VintageMatch:
|
|
|
46
51
|
n_observations: Number of non-null observations from the supplied data that were compared against each vintage.
|
|
47
52
|
rtol: Relative tolerance used for the value comparison.
|
|
48
53
|
atol: Absolute tolerance used for the value comparison.
|
|
54
|
+
decimals: Number of decimals both sides were rounded to before comparison, or None when no rounding was applied.
|
|
55
|
+
n_vintages_compared: Total number of vintages the supplied data was compared against.
|
|
56
|
+
n_vintages_covering: Number of vintages containing every supplied timestamp. When zero, the data failed on coverage rather than on values — see ``failure_reason``.
|
|
57
|
+
alignment_hint: When nothing matched but a diagnostic pass found a reinterpretation of the timestamps under which the values do match (wrong timezone localization, a constant time shift, or a different day-of-period convention), a human-readable description of it. The hinted reinterpretation never counts as a match — fix the index and re-run.
|
|
58
|
+
time_shift: The constant shift that, added to the supplied index, makes the values match at least one vintage. Only set when the hint came from the constant-shift detector.
|
|
49
59
|
"""
|
|
50
60
|
|
|
51
61
|
release_dates: List[datetime]
|
|
52
62
|
n_observations: int
|
|
53
63
|
rtol: float
|
|
54
64
|
atol: float
|
|
65
|
+
decimals: Optional[int] = None
|
|
66
|
+
n_vintages_compared: int = 0
|
|
67
|
+
n_vintages_covering: int = 0
|
|
68
|
+
alignment_hint: Optional[str] = None
|
|
69
|
+
time_shift: Optional[timedelta] = None
|
|
55
70
|
|
|
56
71
|
@property
|
|
57
72
|
def matched(self) -> bool:
|
|
@@ -63,6 +78,20 @@ class VintageMatch:
|
|
|
63
78
|
"""True if the supplied data matched more than one vintage."""
|
|
64
79
|
return len(self.release_dates) > 1
|
|
65
80
|
|
|
81
|
+
@property
|
|
82
|
+
def failure_reason(self) -> Optional[str]:
|
|
83
|
+
"""
|
|
84
|
+
Why the supplied data matched no vintage, or None when it matched.
|
|
85
|
+
|
|
86
|
+
Returns "coverage" when no vintage contains the supplied timestamps — usually a sign the index dates or timezone are wrong rather than the values — and "values" when at least one vintage contains the timestamps but none matched (the values disagreed, or ``require_exact_coverage`` excluded vintages carrying extra observations).
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Optional[str]: "coverage", "values", or None when the data matched.
|
|
90
|
+
"""
|
|
91
|
+
if self.matched:
|
|
92
|
+
return None
|
|
93
|
+
return "coverage" if self.n_vintages_covering == 0 else "values"
|
|
94
|
+
|
|
66
95
|
@property
|
|
67
96
|
def release_date(self) -> Optional[datetime]:
|
|
68
97
|
"""
|
|
@@ -85,7 +114,20 @@ class VintageMatch:
|
|
|
85
114
|
"""
|
|
86
115
|
compared = f"compared {self.n_observations} observation(s)"
|
|
87
116
|
if not self.matched:
|
|
88
|
-
|
|
117
|
+
if self.failure_reason == "coverage":
|
|
118
|
+
message = (
|
|
119
|
+
"VintageMatch(no matching vintage found; no vintage contains "
|
|
120
|
+
"the supplied timestamps - check the index dates/timezone"
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
message = (
|
|
124
|
+
f"VintageMatch(no matching vintage found; "
|
|
125
|
+
f"{self.n_vintages_covering} vintage(s) contain the supplied "
|
|
126
|
+
f"timestamps but none matched"
|
|
127
|
+
)
|
|
128
|
+
if self.alignment_hint:
|
|
129
|
+
message += f"; hint: {self.alignment_hint}"
|
|
130
|
+
return f"{message}; {compared})"
|
|
89
131
|
if self.is_ambiguous:
|
|
90
132
|
dates = ", ".join(d.strftime("%Y-%m-%d") for d in self.release_dates)
|
|
91
133
|
return (
|
|
@@ -420,42 +462,59 @@ class MTTimeSeries:
|
|
|
420
462
|
rtol: float = 1e-05,
|
|
421
463
|
atol: float = 1e-08,
|
|
422
464
|
require_exact_coverage: bool = False,
|
|
465
|
+
decimals: Optional[int] = None,
|
|
423
466
|
) -> VintageMatch:
|
|
424
467
|
"""
|
|
425
|
-
Identify which vintage(s) a block of
|
|
468
|
+
Identify which vintage(s) a block of data with an unknown release date came from.
|
|
426
469
|
|
|
427
470
|
Replication packages frequently ship a series of observations with no release date attached, only a source.
|
|
428
471
|
This compares the supplied data against every vintage in this MTTimeSeries and reports the release date(s) whose values it is consistent with, so you can recover the vintage you are actually working with.
|
|
472
|
+
Note that only the release date is treated as unknown: the observations themselves must be dated, with the series index supplying the observation dates.
|
|
429
473
|
|
|
430
474
|
The supplied data is treated as a (possibly incomplete) window of a vintage: every timestamp in ``series`` must be present in a vintage and its values must agree (within tolerance) for that vintage to match.
|
|
431
475
|
A vintage may carry extra observations the data does not include.
|
|
432
476
|
When the data does not change across consecutive vintages the match is necessarily ambiguous, and all consistent release dates are returned.
|
|
433
477
|
|
|
478
|
+
When nothing matches, a diagnostic pass checks whether the values would match under a common timestamp misalignment — the index localized to the wrong timezone, shifted by a constant offset, or stamped with a different day-of-period convention (e.g. month-end instead of month-start) — and reports it via ``VintageMatch.alignment_hint``.
|
|
479
|
+
A hinted reinterpretation is never counted as a match.
|
|
480
|
+
|
|
434
481
|
Args:
|
|
435
|
-
series (pd.Series): The
|
|
436
|
-
|
|
437
|
-
A
|
|
482
|
+
series (pd.Series): The data to identify, indexed by observation date.
|
|
483
|
+
A tz-naive index (dates, date strings, or naive timestamps) is interpreted in the source's native observation timezone — e.g. midnight US Central for FRED — falling back to UTC with a warning when the source has no registered manager.
|
|
484
|
+
A ``pd.PeriodIndex`` is compared on each period's start timestamp.
|
|
485
|
+
A numeric index is rejected, because pandas would silently read it as nanosecond offsets from 1970 rather than dates.
|
|
486
|
+
Null values are dropped before matching.
|
|
438
487
|
rtol (float): Relative tolerance for the value comparison, passed through to ``numpy.isclose``. Defaults to 1e-05.
|
|
439
488
|
atol (float): Absolute tolerance for the value comparison, passed through to ``numpy.isclose``. Defaults to 1e-08.
|
|
440
489
|
require_exact_coverage (bool): If True, a vintage only matches when its timestamps are exactly the timestamps in ``series``, rather than allowing the data to be a sub-window of the vintage. Defaults to False.
|
|
490
|
+
decimals (Optional[int]): When set, both the supplied data and each vintage's values are rounded to this many decimals before comparison.
|
|
491
|
+
Use this when the data was published at a fixed precision (e.g. ``decimals=1`` for a series published at one decimal place); it is more faithful than loosening ``atol``, which both accepts values that round apart and rejects values that round together. Defaults to None (no rounding).
|
|
441
492
|
|
|
442
493
|
Returns:
|
|
443
|
-
VintageMatch: The matching release date(s) and comparison details.
|
|
494
|
+
VintageMatch: The matching release date(s) and comparison details.
|
|
495
|
+
Check ``matched`` to see whether at least one vintage matched, ``failure_reason`` to distinguish data whose timestamps no vintage contains ("coverage") from data that no vintage matched despite containing its timestamps ("values"), and ``alignment_hint`` for a detected timestamp misalignment.
|
|
444
496
|
|
|
445
497
|
Raises:
|
|
446
498
|
TypeError: If ``series`` is not a pandas Series.
|
|
447
|
-
ValueError: If ``series`` is empty, has a non-date or duplicated index, or contains no non-null observations.
|
|
499
|
+
ValueError: If ``series`` is empty, has a numeric, non-date, or duplicated index, or contains no non-null observations.
|
|
448
500
|
"""
|
|
449
|
-
candidate = self._prepare_candidate_series(series)
|
|
501
|
+
candidate, original_tz = self._prepare_candidate_series(series)
|
|
502
|
+
candidate_values = candidate.to_numpy(dtype=float)
|
|
503
|
+
if decimals is not None:
|
|
504
|
+
candidate_values = np.round(candidate_values, decimals)
|
|
450
505
|
|
|
451
506
|
matches: List[datetime] = []
|
|
507
|
+
vintage_frames: List[Tuple[datetime, pd.Series]] = []
|
|
508
|
+
n_vintages_covering = 0
|
|
452
509
|
for vintage in self._vintages_including_current_series:
|
|
453
510
|
vintage_df = vintage.to_dataframe(mode="default", tz="utc")
|
|
454
511
|
vintage_series = vintage_df.set_index("timestamp")["value"]
|
|
512
|
+
vintage_frames.append((vintage.release_date, vintage_series))
|
|
455
513
|
|
|
456
514
|
# Every supplied timestamp must exist in the vintage, otherwise the data cannot be a window of it.
|
|
457
515
|
if not candidate.index.isin(vintage_series.index).all():
|
|
458
516
|
continue
|
|
517
|
+
n_vintages_covering += 1
|
|
459
518
|
|
|
460
519
|
# With exact coverage the vintage must hold exactly the supplied timestamps and nothing more.
|
|
461
520
|
if (
|
|
@@ -464,37 +523,64 @@ class MTTimeSeries:
|
|
|
464
523
|
):
|
|
465
524
|
continue
|
|
466
525
|
|
|
467
|
-
|
|
526
|
+
aligned_values = vintage_series.reindex(candidate.index).to_numpy(
|
|
527
|
+
dtype=float
|
|
528
|
+
)
|
|
529
|
+
if decimals is not None:
|
|
530
|
+
aligned_values = np.round(aligned_values, decimals)
|
|
468
531
|
if np.isclose(
|
|
469
|
-
|
|
470
|
-
|
|
532
|
+
candidate_values,
|
|
533
|
+
aligned_values,
|
|
471
534
|
rtol=rtol,
|
|
472
535
|
atol=atol,
|
|
473
536
|
).all():
|
|
474
537
|
matches.append(vintage.release_date)
|
|
475
538
|
|
|
539
|
+
alignment_hint: Optional[str] = None
|
|
540
|
+
time_shift: Optional[timedelta] = None
|
|
541
|
+
if not matches:
|
|
542
|
+
alignment_hint, time_shift = self._diagnose_misalignment(
|
|
543
|
+
candidate,
|
|
544
|
+
candidate_values,
|
|
545
|
+
vintage_frames,
|
|
546
|
+
rtol,
|
|
547
|
+
atol,
|
|
548
|
+
decimals,
|
|
549
|
+
original_tz,
|
|
550
|
+
)
|
|
551
|
+
if alignment_hint is not None:
|
|
552
|
+
logger.warning("No vintage matched, but %s.", alignment_hint)
|
|
553
|
+
|
|
476
554
|
return VintageMatch(
|
|
477
555
|
release_dates=sorted(matches),
|
|
478
556
|
n_observations=len(candidate),
|
|
479
557
|
rtol=rtol,
|
|
480
558
|
atol=atol,
|
|
559
|
+
decimals=decimals,
|
|
560
|
+
n_vintages_compared=len(vintage_frames),
|
|
561
|
+
n_vintages_covering=n_vintages_covering,
|
|
562
|
+
alignment_hint=alignment_hint,
|
|
563
|
+
time_shift=time_shift,
|
|
481
564
|
)
|
|
482
565
|
|
|
483
|
-
def _prepare_candidate_series(
|
|
566
|
+
def _prepare_candidate_series(
|
|
567
|
+
self, series: pd.Series
|
|
568
|
+
) -> Tuple[pd.Series, Optional[tzinfo]]:
|
|
484
569
|
"""
|
|
485
570
|
Validate and normalize a user-supplied data series for vintage matching.
|
|
486
571
|
|
|
487
572
|
Coerces the values to numeric, drops nulls, and renders the index as a sorted, unique, tz-aware UTC DatetimeIndex so it lines up with the timestamps produced by ``to_dataframe(tz="utc")``.
|
|
573
|
+
A tz-naive index is interpreted in the source's native observation timezone (see ``_native_observation_timezone``), a PeriodIndex is taken at each period's start, and a numeric index is rejected.
|
|
488
574
|
|
|
489
575
|
Args:
|
|
490
576
|
series (pd.Series): The user-supplied data indexed by date.
|
|
491
577
|
|
|
492
578
|
Returns:
|
|
493
|
-
pd.Series: The cleaned candidate series indexed by UTC timestamps.
|
|
579
|
+
Tuple[pd.Series, Optional[tzinfo]]: The cleaned candidate series indexed by UTC timestamps, and the timezone the supplied index carried (None when it was tz-naive) so misalignment diagnostics can recover the original wall-clock times.
|
|
494
580
|
|
|
495
581
|
Raises:
|
|
496
582
|
TypeError: If ``series`` is not a pandas Series.
|
|
497
|
-
ValueError: If ``series`` is empty, has a non-date or duplicated index, or contains no non-null observations.
|
|
583
|
+
ValueError: If ``series`` is empty, has a numeric, non-date, or duplicated index, or contains no non-null observations.
|
|
498
584
|
"""
|
|
499
585
|
if not isinstance(series, pd.Series):
|
|
500
586
|
raise TypeError(
|
|
@@ -507,8 +593,20 @@ class MTTimeSeries:
|
|
|
507
593
|
if candidate.empty:
|
|
508
594
|
raise ValueError("The series contains no non-null observations to match.")
|
|
509
595
|
|
|
596
|
+
index_data = candidate.index
|
|
597
|
+
# Periods carry real dates; compare on each period's start timestamp.
|
|
598
|
+
if isinstance(index_data, pd.PeriodIndex):
|
|
599
|
+
index_data = index_data.to_timestamp()
|
|
600
|
+
|
|
601
|
+
# Reject positional/numeric indexes before pd.to_datetime, which would
|
|
602
|
+
# silently read them as nanosecond offsets from 1970-01-01.
|
|
603
|
+
if pd.api.types.is_numeric_dtype(index_data):
|
|
604
|
+
raise ValueError(
|
|
605
|
+
"The series has a numeric index, not dates. Set the observation dates on the index before matching."
|
|
606
|
+
)
|
|
607
|
+
|
|
510
608
|
try:
|
|
511
|
-
index = pd.to_datetime(
|
|
609
|
+
index = pd.to_datetime(index_data)
|
|
512
610
|
except (ValueError, TypeError) as exc:
|
|
513
611
|
raise ValueError(
|
|
514
612
|
"The series must be indexed by dates that pandas can parse."
|
|
@@ -517,11 +615,15 @@ class MTTimeSeries:
|
|
|
517
615
|
if not isinstance(index, pd.DatetimeIndex):
|
|
518
616
|
raise ValueError("The series must be indexed by dates, not scalar values.")
|
|
519
617
|
|
|
618
|
+
original_tz = index.tz
|
|
520
619
|
if index.tz is None:
|
|
620
|
+
native_tz = self._native_observation_timezone()
|
|
521
621
|
logger.warning(
|
|
522
|
-
"The series index has no timezone information.
|
|
622
|
+
"The series index has no timezone information. Interpreting it in "
|
|
623
|
+
"the source's native observation timezone (%s).",
|
|
624
|
+
native_tz,
|
|
523
625
|
)
|
|
524
|
-
index = index.tz_localize("UTC")
|
|
626
|
+
index = index.tz_localize(native_tz).tz_convert("UTC")
|
|
525
627
|
else:
|
|
526
628
|
index = index.tz_convert("UTC")
|
|
527
629
|
|
|
@@ -529,7 +631,249 @@ class MTTimeSeries:
|
|
|
529
631
|
raise ValueError("The series index contains duplicate timestamps.")
|
|
530
632
|
|
|
531
633
|
candidate.index = index
|
|
532
|
-
return candidate.sort_index()
|
|
634
|
+
return candidate.sort_index(), original_tz
|
|
635
|
+
|
|
636
|
+
def _diagnose_misalignment(
|
|
637
|
+
self,
|
|
638
|
+
candidate: pd.Series,
|
|
639
|
+
candidate_values: np.ndarray,
|
|
640
|
+
vintage_frames: List[Tuple[datetime, pd.Series]],
|
|
641
|
+
rtol: float,
|
|
642
|
+
atol: float,
|
|
643
|
+
decimals: Optional[int],
|
|
644
|
+
original_tz: Optional[tzinfo],
|
|
645
|
+
) -> Tuple[Optional[str], Optional[timedelta]]:
|
|
646
|
+
"""
|
|
647
|
+
Look for a timestamp reinterpretation under which the unmatched data would match.
|
|
648
|
+
|
|
649
|
+
Runs the detectors from most to least specific — wrong timezone localization, a constant time shift, then a day-of-period convention mismatch — and stops at the first that fires.
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
candidate (pd.Series): The prepared candidate series (UTC index).
|
|
653
|
+
candidate_values (np.ndarray): The candidate values, already rounded when ``decimals`` is set.
|
|
654
|
+
vintage_frames (List[Tuple[datetime, pd.Series]]): Each vintage's release date and UTC-indexed values.
|
|
655
|
+
rtol (float): Relative tolerance for the value comparison.
|
|
656
|
+
atol (float): Absolute tolerance for the value comparison.
|
|
657
|
+
decimals (Optional[int]): Decimals both sides are rounded to, or None.
|
|
658
|
+
original_tz (Optional[tzinfo]): The timezone the supplied index carried, None when it was tz-naive.
|
|
659
|
+
|
|
660
|
+
Returns:
|
|
661
|
+
Tuple[Optional[str], Optional[timedelta]]: A human-readable hint and, for the constant-shift detector only, the shift that aligns the index. Both None when no detector fired.
|
|
662
|
+
"""
|
|
663
|
+
hint = self._diagnose_wrong_timezone(
|
|
664
|
+
candidate,
|
|
665
|
+
candidate_values,
|
|
666
|
+
vintage_frames,
|
|
667
|
+
rtol,
|
|
668
|
+
atol,
|
|
669
|
+
decimals,
|
|
670
|
+
original_tz,
|
|
671
|
+
)
|
|
672
|
+
if hint is not None:
|
|
673
|
+
return hint, None
|
|
674
|
+
|
|
675
|
+
hint, shift = self._diagnose_constant_shift(
|
|
676
|
+
candidate, candidate_values, vintage_frames, rtol, atol, decimals
|
|
677
|
+
)
|
|
678
|
+
if hint is not None:
|
|
679
|
+
return hint, shift
|
|
680
|
+
|
|
681
|
+
hint = self._diagnose_period_alignment(
|
|
682
|
+
candidate, candidate_values, vintage_frames, rtol, atol, decimals
|
|
683
|
+
)
|
|
684
|
+
return hint, None
|
|
685
|
+
|
|
686
|
+
def _diagnose_wrong_timezone(
|
|
687
|
+
self,
|
|
688
|
+
candidate: pd.Series,
|
|
689
|
+
candidate_values: np.ndarray,
|
|
690
|
+
vintage_frames: List[Tuple[datetime, pd.Series]],
|
|
691
|
+
rtol: float,
|
|
692
|
+
atol: float,
|
|
693
|
+
decimals: Optional[int],
|
|
694
|
+
original_tz: Optional[tzinfo],
|
|
695
|
+
) -> Optional[str]:
|
|
696
|
+
"""
|
|
697
|
+
Check whether the data matches when its wall-clock times are read in the source's native timezone.
|
|
698
|
+
|
|
699
|
+
Only applies to a tz-aware index (a naive one already went through the native timezone), and catches indexes localized to the wrong timezone — including across DST changes, where the error is not a constant offset.
|
|
700
|
+
|
|
701
|
+
Returns:
|
|
702
|
+
Optional[str]: The hint, or None when the detector did not fire.
|
|
703
|
+
"""
|
|
704
|
+
if original_tz is None:
|
|
705
|
+
return None
|
|
706
|
+
native_tz = self._native_observation_timezone()
|
|
707
|
+
wall_clock = candidate.index.tz_convert(original_tz).tz_localize(None)
|
|
708
|
+
try:
|
|
709
|
+
reinterpreted = wall_clock.tz_localize(native_tz).tz_convert("UTC")
|
|
710
|
+
except Exception:
|
|
711
|
+
# Wall-clock times that do not exist (or are ambiguous) in the
|
|
712
|
+
# native timezone around a DST change cannot be reinterpreted.
|
|
713
|
+
return None
|
|
714
|
+
if reinterpreted.has_duplicates or reinterpreted.equals(candidate.index):
|
|
715
|
+
return None
|
|
716
|
+
|
|
717
|
+
n_matching = sum(
|
|
718
|
+
self._candidate_matches_vintage(
|
|
719
|
+
reinterpreted, vintage_series, candidate_values, rtol, atol, decimals
|
|
720
|
+
)
|
|
721
|
+
for _, vintage_series in vintage_frames
|
|
722
|
+
)
|
|
723
|
+
if n_matching == 0:
|
|
724
|
+
return None
|
|
725
|
+
return (
|
|
726
|
+
f"the values match {n_matching} vintage(s) when the wall-clock times "
|
|
727
|
+
f"are reinterpreted in the source's native observation timezone "
|
|
728
|
+
f"({native_tz}) — the index appears to be localized to the wrong "
|
|
729
|
+
f"timezone; pass a tz-naive index or localize it to {native_tz}"
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
def _diagnose_constant_shift(
|
|
733
|
+
self,
|
|
734
|
+
candidate: pd.Series,
|
|
735
|
+
candidate_values: np.ndarray,
|
|
736
|
+
vintage_frames: List[Tuple[datetime, pd.Series]],
|
|
737
|
+
rtol: float,
|
|
738
|
+
atol: float,
|
|
739
|
+
decimals: Optional[int],
|
|
740
|
+
) -> Tuple[Optional[str], Optional[timedelta]]:
|
|
741
|
+
"""
|
|
742
|
+
Check whether the data matches a vintage when its index is shifted by a constant offset.
|
|
743
|
+
|
|
744
|
+
Offsets are anchored on aligning the first candidate timestamp to each vintage timestamp and pruned by requiring the middle and last timestamps to land in the vintage too, so only structurally possible shifts are value-checked.
|
|
745
|
+
Skipped for short candidates, where some shift could match by coincidence (see ``MIN_OBSERVATIONS_FOR_SHIFT_DETECTION``).
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
Tuple[Optional[str], Optional[timedelta]]: The hint and the shift to add to the index, or (None, None) when the detector did not fire.
|
|
749
|
+
"""
|
|
750
|
+
if len(candidate) < MIN_OBSERVATIONS_FOR_SHIFT_DETECTION:
|
|
751
|
+
return None, None
|
|
752
|
+
|
|
753
|
+
first = candidate.index[0]
|
|
754
|
+
middle = candidate.index[len(candidate) // 2]
|
|
755
|
+
last = candidate.index[-1]
|
|
756
|
+
shifts: Dict[timedelta, int] = {}
|
|
757
|
+
for _, vintage_series in vintage_frames:
|
|
758
|
+
offsets = vintage_series.index - first
|
|
759
|
+
offsets = offsets[(middle + offsets).isin(vintage_series.index)]
|
|
760
|
+
offsets = offsets[(last + offsets).isin(vintage_series.index)]
|
|
761
|
+
for offset in offsets:
|
|
762
|
+
if offset == pd.Timedelta(0):
|
|
763
|
+
# A zero shift is the comparison that already failed.
|
|
764
|
+
continue
|
|
765
|
+
if self._candidate_matches_vintage(
|
|
766
|
+
candidate.index + offset,
|
|
767
|
+
vintage_series,
|
|
768
|
+
candidate_values,
|
|
769
|
+
rtol,
|
|
770
|
+
atol,
|
|
771
|
+
decimals,
|
|
772
|
+
):
|
|
773
|
+
shifts[offset] = shifts.get(offset, 0) + 1
|
|
774
|
+
|
|
775
|
+
if not shifts:
|
|
776
|
+
return None, None
|
|
777
|
+
best = min(shifts, key=abs)
|
|
778
|
+
direction = "forward" if best > pd.Timedelta(0) else "back"
|
|
779
|
+
hint = (
|
|
780
|
+
f"the values match {shifts[best]} vintage(s) when the index is "
|
|
781
|
+
f"shifted {direction} by {abs(best)} — the timestamps appear to "
|
|
782
|
+
f"follow a different convention than the stored observations"
|
|
783
|
+
)
|
|
784
|
+
return hint, best
|
|
785
|
+
|
|
786
|
+
def _diagnose_period_alignment(
|
|
787
|
+
self,
|
|
788
|
+
candidate: pd.Series,
|
|
789
|
+
candidate_values: np.ndarray,
|
|
790
|
+
vintage_frames: List[Tuple[datetime, pd.Series]],
|
|
791
|
+
rtol: float,
|
|
792
|
+
atol: float,
|
|
793
|
+
decimals: Optional[int],
|
|
794
|
+
) -> Optional[str]:
|
|
795
|
+
"""
|
|
796
|
+
Check whether the data matches a vintage when both are compared by calendar period.
|
|
797
|
+
|
|
798
|
+
Reduces both indexes to periods at the series frequency (daily or coarser), which washes out time-of-day and day-of-period conventions — catching e.g. month-end dates against month-start storage, a mismatch that is not a constant offset.
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
Optional[str]: The hint, or None when the detector did not fire.
|
|
802
|
+
"""
|
|
803
|
+
try:
|
|
804
|
+
freq = self._infer_pandas_freq()
|
|
805
|
+
except (ValueError, TypeError):
|
|
806
|
+
# Too few observations, or per-row DST offsets that pandas cannot
|
|
807
|
+
# combine into a single tz-aware index.
|
|
808
|
+
return None
|
|
809
|
+
if freq is None:
|
|
810
|
+
return None
|
|
811
|
+
period_freq = get_period_alias(freq)
|
|
812
|
+
if period_freq is None or period_freq[:1].upper() not in {
|
|
813
|
+
"D",
|
|
814
|
+
"W",
|
|
815
|
+
"M",
|
|
816
|
+
"Q",
|
|
817
|
+
"A",
|
|
818
|
+
"Y",
|
|
819
|
+
}:
|
|
820
|
+
return None
|
|
821
|
+
|
|
822
|
+
candidate_periods = candidate.index.tz_localize(None).to_period(period_freq)
|
|
823
|
+
if candidate_periods.has_duplicates:
|
|
824
|
+
return None
|
|
825
|
+
|
|
826
|
+
n_matching = 0
|
|
827
|
+
for _, vintage_series in vintage_frames:
|
|
828
|
+
vintage_periods = vintage_series.index.tz_localize(None).to_period(
|
|
829
|
+
period_freq
|
|
830
|
+
)
|
|
831
|
+
if vintage_periods.has_duplicates:
|
|
832
|
+
continue
|
|
833
|
+
period_series = pd.Series(vintage_series.to_numpy(), index=vintage_periods)
|
|
834
|
+
if self._candidate_matches_vintage(
|
|
835
|
+
candidate_periods, period_series, candidate_values, rtol, atol, decimals
|
|
836
|
+
):
|
|
837
|
+
n_matching += 1
|
|
838
|
+
|
|
839
|
+
if n_matching == 0:
|
|
840
|
+
return None
|
|
841
|
+
return (
|
|
842
|
+
f"the values match {n_matching} vintage(s) when compared by calendar "
|
|
843
|
+
f"period ({period_freq}) — the index appears to use a different "
|
|
844
|
+
f"day-of-period or time convention than the stored observations "
|
|
845
|
+
f"(e.g. month-end instead of month-start dates)"
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
@staticmethod
|
|
849
|
+
def _candidate_matches_vintage(
|
|
850
|
+
index: pd.Index,
|
|
851
|
+
vintage_series: pd.Series,
|
|
852
|
+
candidate_values: np.ndarray,
|
|
853
|
+
rtol: float,
|
|
854
|
+
atol: float,
|
|
855
|
+
decimals: Optional[int],
|
|
856
|
+
) -> bool:
|
|
857
|
+
"""
|
|
858
|
+
Whether every index entry exists in the vintage with values agreeing within tolerance.
|
|
859
|
+
|
|
860
|
+
Args:
|
|
861
|
+
index (pd.Index): The (possibly reinterpreted) candidate index.
|
|
862
|
+
vintage_series (pd.Series): The vintage values, indexed compatibly with ``index``.
|
|
863
|
+
candidate_values (np.ndarray): The candidate values, already rounded when ``decimals`` is set.
|
|
864
|
+
rtol (float): Relative tolerance for the value comparison.
|
|
865
|
+
atol (float): Absolute tolerance for the value comparison.
|
|
866
|
+
decimals (Optional[int]): Decimals to round the vintage values to, or None.
|
|
867
|
+
|
|
868
|
+
Returns:
|
|
869
|
+
bool: True when the index is fully covered and all values agree.
|
|
870
|
+
"""
|
|
871
|
+
if not index.isin(vintage_series.index).all():
|
|
872
|
+
return False
|
|
873
|
+
aligned = vintage_series.reindex(index).to_numpy(dtype=float)
|
|
874
|
+
if decimals is not None:
|
|
875
|
+
aligned = np.round(aligned, decimals)
|
|
876
|
+
return bool(np.isclose(candidate_values, aligned, rtol=rtol, atol=atol).all())
|
|
533
877
|
|
|
534
878
|
### Theoretically if the units change, we should not be able to compare them
|
|
535
879
|
def generate_vintage_matrix(self) -> pd.DataFrame:
|
|
@@ -1081,22 +1425,42 @@ class MTTimeSeries:
|
|
|
1081
1425
|
"""
|
|
1082
1426
|
return self.vintages + [self]
|
|
1083
1427
|
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
Returns:
|
|
1088
|
-
UpdateManager: An instance of the appropriate update manager class.
|
|
1089
|
-
"""
|
|
1428
|
+
@staticmethod
|
|
1429
|
+
def _source_manager_classes() -> Dict[str, type]:
|
|
1430
|
+
"""Map source names to their UpdateManager classes, imported lazily to avoid circular imports."""
|
|
1090
1431
|
from macrotrace.sources.fred import FredUpdateManager
|
|
1091
1432
|
from macrotrace.sources.ons import ONSUpdateManager
|
|
1092
1433
|
from macrotrace.sources.rtdsm import RTDSMUpdateManager
|
|
1093
1434
|
|
|
1094
|
-
|
|
1435
|
+
return {
|
|
1095
1436
|
"FRED": FredUpdateManager,
|
|
1096
1437
|
"ONS": ONSUpdateManager,
|
|
1097
1438
|
"RTDSM": RTDSMUpdateManager,
|
|
1098
1439
|
}
|
|
1099
1440
|
|
|
1441
|
+
def _native_observation_timezone(self) -> tzinfo:
|
|
1442
|
+
"""
|
|
1443
|
+
The timezone this series' source stamps observation timestamps with.
|
|
1444
|
+
|
|
1445
|
+
Looked up from the source's update manager class (``NATIVE_OBSERVATION_TZ``).
|
|
1446
|
+
Sources without a registered manager (e.g. user-provided data) fall back to UTC.
|
|
1447
|
+
|
|
1448
|
+
Returns:
|
|
1449
|
+
tzinfo: The source's declared observation timezone, or UTC.
|
|
1450
|
+
"""
|
|
1451
|
+
manager_class = self._source_manager_classes().get(self.source)
|
|
1452
|
+
if manager_class is None:
|
|
1453
|
+
return timezone.utc
|
|
1454
|
+
return manager_class.NATIVE_OBSERVATION_TZ
|
|
1455
|
+
|
|
1456
|
+
def _get_update_manager(self):
|
|
1457
|
+
"""Get the appropriate update manager for the data source.
|
|
1458
|
+
|
|
1459
|
+
Returns:
|
|
1460
|
+
UpdateManager: An instance of the appropriate update manager class.
|
|
1461
|
+
"""
|
|
1462
|
+
source_managers = self._source_manager_classes()
|
|
1463
|
+
|
|
1100
1464
|
assert (
|
|
1101
1465
|
self.source in source_managers.keys()
|
|
1102
1466
|
), f"Unsupported source: {self.source}. No update manager available."
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import Any, List, Dict, Optional, Tuple
|
|
2
|
-
from datetime import datetime, timezone
|
|
2
|
+
from datetime import datetime, timezone, tzinfo
|
|
3
3
|
from math import floor
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from importlib.metadata import version, PackageNotFoundError
|
|
@@ -691,6 +691,12 @@ class ObservationManager:
|
|
|
691
691
|
|
|
692
692
|
|
|
693
693
|
class UpdateManager:
|
|
694
|
+
# The timezone this source stamps observation timestamps with. Every
|
|
695
|
+
# subclass must declare its own — MTTimeSeries.identify_vintage uses it to
|
|
696
|
+
# interpret tz-naive candidate data, so a wrong value silently breaks
|
|
697
|
+
# matching for that source.
|
|
698
|
+
NATIVE_OBSERVATION_TZ: tzinfo
|
|
699
|
+
|
|
694
700
|
def __init__(
|
|
695
701
|
self,
|
|
696
702
|
dataset_id: str,
|
|
@@ -3,6 +3,7 @@ from unittest.mock import MagicMock, patch
|
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import numpy as np
|
|
6
|
+
import pytz
|
|
6
7
|
from darts import TimeSeries
|
|
7
8
|
|
|
8
9
|
from macrotrace.models import (
|
|
@@ -1368,10 +1369,10 @@ def test_identify_vintage_respects_tolerance(sample_time_series_with_revisions):
|
|
|
1368
1369
|
).matched
|
|
1369
1370
|
|
|
1370
1371
|
|
|
1371
|
-
def
|
|
1372
|
+
def test_identify_vintage_naive_index_unknown_source_falls_back_to_utc(
|
|
1372
1373
|
sample_time_series_with_revisions, caplog
|
|
1373
1374
|
):
|
|
1374
|
-
"""A tz-naive index is
|
|
1375
|
+
"""A tz-naive index for a source with no registered manager is interpreted as UTC (with a warning) and still matches."""
|
|
1375
1376
|
target_release = datetime(2024, 12, 10, tzinfo=timezone.utc)
|
|
1376
1377
|
vintage = _vintage_with_release_date(
|
|
1377
1378
|
sample_time_series_with_revisions, target_release
|
|
@@ -1381,10 +1382,229 @@ def test_identify_vintage_naive_index_assumes_utc(
|
|
|
1381
1382
|
|
|
1382
1383
|
result = sample_time_series_with_revisions.identify_vintage(candidate)
|
|
1383
1384
|
|
|
1384
|
-
assert "series index has no timezone information
|
|
1385
|
+
assert "series index has no timezone information" in caplog.text
|
|
1386
|
+
assert "(UTC)" in caplog.text
|
|
1385
1387
|
assert result.release_date == target_release
|
|
1386
1388
|
|
|
1387
1389
|
|
|
1390
|
+
def test_identify_vintage_naive_index_uses_source_native_timezone():
|
|
1391
|
+
"""
|
|
1392
|
+
A tz-naive index on a FRED series is interpreted at US Central midnight,
|
|
1393
|
+
matching how FRED stores observations — including across a DST change,
|
|
1394
|
+
where the UTC offset differs between observations.
|
|
1395
|
+
"""
|
|
1396
|
+
us_central = pytz.timezone("America/Chicago")
|
|
1397
|
+
release_date = datetime(2024, 3, 12, tzinfo=timezone.utc)
|
|
1398
|
+
# One observation either side of the 2024-03-10 US DST transition.
|
|
1399
|
+
naive_dates = [datetime(2024, 3, 9), datetime(2024, 3, 11)]
|
|
1400
|
+
observations = [
|
|
1401
|
+
MTObservation(
|
|
1402
|
+
timestamp=us_central.localize(date),
|
|
1403
|
+
value=100.0 + i,
|
|
1404
|
+
release_date=release_date,
|
|
1405
|
+
)
|
|
1406
|
+
for i, date in enumerate(naive_dates)
|
|
1407
|
+
]
|
|
1408
|
+
ts = MTTimeSeries._from_data(
|
|
1409
|
+
dataset_id="TEST",
|
|
1410
|
+
release_date=release_date,
|
|
1411
|
+
current_observations=observations,
|
|
1412
|
+
vintages=[],
|
|
1413
|
+
source="FRED",
|
|
1414
|
+
frequency="D",
|
|
1415
|
+
)
|
|
1416
|
+
|
|
1417
|
+
candidate = pd.Series([100.0, 101.0], index=pd.to_datetime(naive_dates))
|
|
1418
|
+
|
|
1419
|
+
result = ts.identify_vintage(candidate)
|
|
1420
|
+
|
|
1421
|
+
assert result.matched
|
|
1422
|
+
assert result.release_date == release_date
|
|
1423
|
+
|
|
1424
|
+
|
|
1425
|
+
def test_identify_vintage_rejects_numeric_index(sample_time_series):
|
|
1426
|
+
"""A positional or numeric index would silently become nanosecond offsets from 1970, so it is rejected."""
|
|
1427
|
+
positional = pd.Series([100.0, 101.0, 102.0])
|
|
1428
|
+
with pytest.raises(ValueError, match="numeric index"):
|
|
1429
|
+
sample_time_series.identify_vintage(positional)
|
|
1430
|
+
|
|
1431
|
+
year_indexed = pd.Series([100.0, 101.0], index=[2024, 2025])
|
|
1432
|
+
with pytest.raises(ValueError, match="numeric index"):
|
|
1433
|
+
sample_time_series.identify_vintage(year_indexed)
|
|
1434
|
+
|
|
1435
|
+
|
|
1436
|
+
def test_identify_vintage_accepts_period_index(sample_time_series):
|
|
1437
|
+
"""A PeriodIndex is compared on each period's start timestamp."""
|
|
1438
|
+
full = sample_time_series.to_series()
|
|
1439
|
+
candidate = pd.Series(
|
|
1440
|
+
full.to_numpy(),
|
|
1441
|
+
index=pd.PeriodIndex(full.index.tz_localize(None), freq="D"),
|
|
1442
|
+
)
|
|
1443
|
+
|
|
1444
|
+
result = sample_time_series.identify_vintage(candidate)
|
|
1445
|
+
|
|
1446
|
+
assert result.matched
|
|
1447
|
+
assert result.release_date == sample_time_series.release_date
|
|
1448
|
+
|
|
1449
|
+
|
|
1450
|
+
def test_identify_vintage_decimals_rounds_both_sides(
|
|
1451
|
+
sample_time_series_with_revisions,
|
|
1452
|
+
):
|
|
1453
|
+
"""Rounding-aware comparison matches data republished at lower precision without loosening atol."""
|
|
1454
|
+
target_release = datetime(2024, 12, 10, tzinfo=timezone.utc)
|
|
1455
|
+
vintage = _vintage_with_release_date(
|
|
1456
|
+
sample_time_series_with_revisions, target_release
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
# Perturbed below the rounding boundary: fails raw, matches at one decimal.
|
|
1460
|
+
candidate = vintage.to_series() + 0.04
|
|
1461
|
+
assert not sample_time_series_with_revisions.identify_vintage(candidate).matched
|
|
1462
|
+
|
|
1463
|
+
result = sample_time_series_with_revisions.identify_vintage(candidate, decimals=1)
|
|
1464
|
+
assert result.release_date == target_release
|
|
1465
|
+
assert result.decimals == 1
|
|
1466
|
+
|
|
1467
|
+
# Perturbed past the rounding boundary: rounds away from the stored values.
|
|
1468
|
+
assert not sample_time_series_with_revisions.identify_vintage(
|
|
1469
|
+
vintage.to_series() + 0.06, decimals=1
|
|
1470
|
+
).matched
|
|
1471
|
+
|
|
1472
|
+
|
|
1473
|
+
def test_identify_vintage_failure_reason(sample_time_series):
|
|
1474
|
+
"""failure_reason separates timestamp-coverage failures from value disagreements."""
|
|
1475
|
+
full = sample_time_series.to_series()
|
|
1476
|
+
|
|
1477
|
+
matched = sample_time_series.identify_vintage(full)
|
|
1478
|
+
assert matched.failure_reason is None
|
|
1479
|
+
assert matched.n_vintages_compared == len(
|
|
1480
|
+
sample_time_series._vintages_including_current_series
|
|
1481
|
+
)
|
|
1482
|
+
|
|
1483
|
+
# Same values at timestamps no vintage contains: fails on coverage.
|
|
1484
|
+
shifted = full.copy()
|
|
1485
|
+
shifted.index = shifted.index + pd.Timedelta(hours=6)
|
|
1486
|
+
coverage_failure = sample_time_series.identify_vintage(shifted)
|
|
1487
|
+
assert not coverage_failure.matched
|
|
1488
|
+
assert coverage_failure.failure_reason == "coverage"
|
|
1489
|
+
assert coverage_failure.n_vintages_covering == 0
|
|
1490
|
+
assert "check the index dates/timezone" in repr(coverage_failure)
|
|
1491
|
+
|
|
1492
|
+
# Right timestamps, wrong values: fails on values, and no reinterpretation
|
|
1493
|
+
# of the timestamps can explain values that exist in no vintage.
|
|
1494
|
+
wrong_values = pd.Series(9999.0, index=full.index)
|
|
1495
|
+
value_failure = sample_time_series.identify_vintage(wrong_values)
|
|
1496
|
+
assert not value_failure.matched
|
|
1497
|
+
assert value_failure.failure_reason == "values"
|
|
1498
|
+
assert value_failure.n_vintages_covering > 0
|
|
1499
|
+
assert "no matching vintage found" in repr(value_failure)
|
|
1500
|
+
assert value_failure.alignment_hint is None
|
|
1501
|
+
assert value_failure.time_shift is None
|
|
1502
|
+
|
|
1503
|
+
|
|
1504
|
+
def test_identify_vintage_hints_constant_shift(sample_time_series):
|
|
1505
|
+
"""An index shifted by a constant offset is flagged with the shift that aligns it."""
|
|
1506
|
+
full = sample_time_series.to_series()
|
|
1507
|
+
shifted = full.copy()
|
|
1508
|
+
shifted.index = shifted.index + pd.Timedelta(hours=6)
|
|
1509
|
+
|
|
1510
|
+
result = sample_time_series.identify_vintage(shifted)
|
|
1511
|
+
|
|
1512
|
+
assert not result.matched
|
|
1513
|
+
assert result.time_shift == pd.Timedelta(hours=-6)
|
|
1514
|
+
assert "shifted back by" in result.alignment_hint
|
|
1515
|
+
assert "hint:" in repr(result)
|
|
1516
|
+
|
|
1517
|
+
|
|
1518
|
+
def test_identify_vintage_hints_wrong_timezone(caplog):
|
|
1519
|
+
"""
|
|
1520
|
+
A tz-aware index localized to the wrong timezone is flagged via wall-clock
|
|
1521
|
+
reinterpretation — across a DST change, where no constant shift exists.
|
|
1522
|
+
"""
|
|
1523
|
+
us_central = pytz.timezone("America/Chicago")
|
|
1524
|
+
release_date = datetime(2024, 3, 12, tzinfo=timezone.utc)
|
|
1525
|
+
naive_dates = [datetime(2024, 3, 8), datetime(2024, 3, 9), datetime(2024, 3, 11)]
|
|
1526
|
+
observations = [
|
|
1527
|
+
MTObservation(
|
|
1528
|
+
timestamp=us_central.localize(date),
|
|
1529
|
+
value=100.0 + i,
|
|
1530
|
+
release_date=release_date,
|
|
1531
|
+
)
|
|
1532
|
+
for i, date in enumerate(naive_dates)
|
|
1533
|
+
]
|
|
1534
|
+
ts = MTTimeSeries._from_data(
|
|
1535
|
+
dataset_id="TEST",
|
|
1536
|
+
release_date=release_date,
|
|
1537
|
+
current_observations=observations,
|
|
1538
|
+
vintages=[],
|
|
1539
|
+
source="FRED",
|
|
1540
|
+
frequency="D",
|
|
1541
|
+
)
|
|
1542
|
+
|
|
1543
|
+
# The right wall-clock dates, wrongly localized to UTC.
|
|
1544
|
+
candidate = pd.Series(
|
|
1545
|
+
[100.0, 101.0, 102.0], index=pd.to_datetime(naive_dates).tz_localize("UTC")
|
|
1546
|
+
)
|
|
1547
|
+
|
|
1548
|
+
result = ts.identify_vintage(candidate)
|
|
1549
|
+
|
|
1550
|
+
assert not result.matched
|
|
1551
|
+
assert result.time_shift is None
|
|
1552
|
+
assert "localized to the wrong timezone" in result.alignment_hint
|
|
1553
|
+
assert "America/Chicago" in result.alignment_hint
|
|
1554
|
+
assert "localized to the wrong timezone" in caplog.text
|
|
1555
|
+
|
|
1556
|
+
|
|
1557
|
+
def test_identify_vintage_hints_period_alignment():
|
|
1558
|
+
"""Month-end dates against month-start storage — not a constant offset — are flagged via period comparison."""
|
|
1559
|
+
release_date = datetime(2024, 5, 2, tzinfo=timezone.utc)
|
|
1560
|
+
month_starts = pd.date_range("2024-01-01", periods=4, freq="MS", tz="UTC")
|
|
1561
|
+
observations = [
|
|
1562
|
+
MTObservation(
|
|
1563
|
+
timestamp=ts_.to_pydatetime(), value=100.0 + i, release_date=release_date
|
|
1564
|
+
)
|
|
1565
|
+
for i, ts_ in enumerate(month_starts)
|
|
1566
|
+
]
|
|
1567
|
+
ts = MTTimeSeries._from_data(
|
|
1568
|
+
dataset_id="TEST",
|
|
1569
|
+
release_date=release_date,
|
|
1570
|
+
current_observations=observations,
|
|
1571
|
+
vintages=[],
|
|
1572
|
+
source="USER",
|
|
1573
|
+
frequency="MS",
|
|
1574
|
+
)
|
|
1575
|
+
|
|
1576
|
+
month_ends = pd.to_datetime(
|
|
1577
|
+
["2024-01-31", "2024-02-29", "2024-03-31", "2024-04-30"]
|
|
1578
|
+
)
|
|
1579
|
+
candidate = pd.Series([100.0, 101.0, 102.0, 103.0], index=month_ends)
|
|
1580
|
+
|
|
1581
|
+
result = ts.identify_vintage(candidate)
|
|
1582
|
+
|
|
1583
|
+
assert not result.matched
|
|
1584
|
+
assert result.time_shift is None
|
|
1585
|
+
assert "calendar period" in result.alignment_hint
|
|
1586
|
+
assert "month-end" in result.alignment_hint
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
def test_identify_vintage_hint_never_counts_as_match(sample_time_series):
|
|
1590
|
+
"""A hinted reinterpretation must not populate release_dates."""
|
|
1591
|
+
full = sample_time_series.to_series()
|
|
1592
|
+
shifted = full.copy()
|
|
1593
|
+
shifted.index = shifted.index + pd.Timedelta(hours=6)
|
|
1594
|
+
|
|
1595
|
+
result = sample_time_series.identify_vintage(shifted)
|
|
1596
|
+
|
|
1597
|
+
assert result.alignment_hint is not None
|
|
1598
|
+
assert result.release_dates == []
|
|
1599
|
+
assert result.release_date is None
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
def test_source_managers_declare_native_observation_timezone():
|
|
1603
|
+
"""Every registered source manager declares the timezone it stamps observations with."""
|
|
1604
|
+
for name, manager in MTTimeSeries._source_manager_classes().items():
|
|
1605
|
+
assert getattr(manager, "NATIVE_OBSERVATION_TZ", None) is not None, name
|
|
1606
|
+
|
|
1607
|
+
|
|
1388
1608
|
def test_identify_vintage_require_exact_coverage(sample_time_series):
|
|
1389
1609
|
"""
|
|
1390
1610
|
Exact coverage disambiguates a window: only the vintage whose timestamps
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{macrotrace-0.2.0 → macrotrace-0.2.2}/tests/assets/mt/time_series/from_dataframe_with_tz.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|