macrotrace 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- macrotrace-0.2.0/CHANGELOG.md +35 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/PKG-INFO +53 -3
- {macrotrace-0.1.0 → macrotrace-0.2.0}/README.md +51 -2
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/__init__.py +2 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/__init__.py +2 -1
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/mt/__init__.py +8 -2
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/mt/plotter.py +1 -1
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/mt/time_series.py +228 -19
- macrotrace-0.2.0/macrotrace/sources/rtdsm.py +1053 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/pyproject.toml +1 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/scripts/backstop_ingest.py +96 -1
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/mt/series/test_init.py +12 -8
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/mt/series/test_series.py +224 -9
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/mt/test_plotter.py +1 -1
- macrotrace-0.2.0/tests/sources/rtdsm/fixtures.py +105 -0
- macrotrace-0.2.0/tests/sources/rtdsm/test_rtdsm_api_client.py +161 -0
- macrotrace-0.2.0/tests/sources/rtdsm/test_rtdsm_dataset_manager.py +67 -0
- macrotrace-0.2.0/tests/sources/rtdsm/test_rtdsm_helpers.py +218 -0
- macrotrace-0.2.0/tests/sources/rtdsm/test_rtdsm_observation_manager.py +65 -0
- macrotrace-0.2.0/tests/sources/rtdsm/test_rtdsm_release_manager.py +118 -0
- macrotrace-0.2.0/tests/sources/rtdsm/test_rtdsm_series_manager.py +23 -0
- macrotrace-0.2.0/tests/sources/rtdsm/test_rtdsm_update_manager.py +80 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/uv.lock +23 -0
- macrotrace-0.1.0/CHANGELOG.md +0 -19
- macrotrace-0.1.0/macrotrace/sources/rtdsm.py +0 -15
- {macrotrace-0.1.0 → macrotrace-0.2.0}/.github/workflows/ci.yml +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/.github/workflows/docs.yml +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/.github/workflows/release.yml +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/.gitignore +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/.pre-commit-config.yaml +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/.python-version +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/LICENSE +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/_paths.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/cli.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/graphing.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/db.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/mt/analysis.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/mt/observation.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/models/mt/series_metadata.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/ons_cli/__init__.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/ons_cli/cli.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/ons_cli/common.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/ons_cli/tui.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/py.typed +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/sources/__init__.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/sources/base.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/sources/example.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/sources/fred.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/macrotrace/sources/ons.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/assets/mt/time_series/expected_vm.csv +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/assets/mt/time_series/from_dataframe.csv +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/assets/mt/time_series/from_dataframe_with_tz.csv +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/mt/series/test_db_path_forwarding.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/mt/test_analysis.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/mt/test_metadata.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/mt/utils.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/models/test_db_models.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/ons_cli/test_cli.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/ons_cli/test_common.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/ons_cli/test_root_cli.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/ons_cli/test_tui.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/ons_cli/utils.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/fixtures.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_base_api_client.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_base_dataset_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_base_observation_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_base_release_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_base_series_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_base_update_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_base_update_state.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/base/test_db_path_resolution.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/fixtures.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/test_fred_api_client.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/test_fred_dataset_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/test_fred_observation_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/test_fred_release_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/test_fred_series_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/test_fred_tz_handling.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/fred/test_fred_update_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/ons/fixtures.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/ons/test_ons_api_client.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/ons/test_ons_dataset_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/ons/test_ons_observation_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/ons/test_ons_release_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/ons/test_ons_series_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/sources/ons/test_ons_update_manager.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/test_package_init.py +0 -0
- {macrotrace-0.1.0 → macrotrace-0.2.0}/tests/test_paths.py +0 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/);
|
|
4
|
+
versions follow [SemVer](https://semver.org/).
|
|
5
|
+
|
|
6
|
+
## 0.2.0 — 2026-06-10
|
|
7
|
+
|
|
8
|
+
- **Sources:** Added the Federal Reserve Bank of Philadelphia's Real-Time
|
|
9
|
+
Data Set for Macroeconomists (RTDSM) — vintage-aware ingestion of 115 U.S.
|
|
10
|
+
macroeconomic series. Each series is parsed from the published full-history spreadsheet; an optional `series_key={"frequency": "Q" | "M"}` selects the vintage frequency, and a monthly refresh throttle avoids re-downloading the same series
|
|
11
|
+
within a calendar month as requested by the Philadelphia Federal Reserve Bank.
|
|
12
|
+
- **Vintage matching:** Added `MTTimeSeries.identify_vintage(...)`, which
|
|
13
|
+
recovers which release(s) an undated block of observations came from by
|
|
14
|
+
comparing it against every stored vintage. Returns a `VintageMatch`
|
|
15
|
+
(`matched`, `is_ambiguous`, `release_date` / `release_dates`) — useful for
|
|
16
|
+
pinning down the vintage behind replication-package data.
|
|
17
|
+
- **Time series:** Added `MTTimeSeries.to_series(...)`, the values-only,
|
|
18
|
+
date-indexed pandas `Series` counterpart to `to_dataframe` (supports the
|
|
19
|
+
`default`, `first_difference`, and `pct_change` modes). Exposed
|
|
20
|
+
`VintageMatch` at the package root.
|
|
21
|
+
|
|
22
|
+
## 0.1.0 — 2026-04-28
|
|
23
|
+
|
|
24
|
+
First public release.
|
|
25
|
+
|
|
26
|
+
- **Sources:** vintage-aware ingestion from FRED and ONS, with a local
|
|
27
|
+
SQLite store (`MacroTrace.db`) and shared request cache.
|
|
28
|
+
- **Time series:** `MTTimeSeries` with `as_of(...)`, vintage- and
|
|
29
|
+
data-window filtering, `from_dataframe`, and pandas / Darts export.
|
|
30
|
+
- **Analysis:** revision metrics, vintage comparison, decomposition
|
|
31
|
+
across vintages, biasedness regression, and revision autocorrelation.
|
|
32
|
+
- **Plotting:** Plotly-based vintage, revision, and decomposition plots
|
|
33
|
+
via `MTTimeSeriesPlotter`.
|
|
34
|
+
- **CLI / TUI:** `macrotrace ons explorer` and `macrotrace ons tui`
|
|
35
|
+
(the latter via the optional `ons-tui` extra).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrotrace
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A Python library for managing and analyzing macroeconomic time series data with vintage awareness.
|
|
5
5
|
Project-URL: Homepage, https://github.com/john-ramsey/macrotrace
|
|
6
6
|
Project-URL: Repository, https://github.com/john-ramsey/macrotrace
|
|
@@ -23,6 +23,7 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
|
23
23
|
Requires-Python: >=3.11
|
|
24
24
|
Requires-Dist: darts>=0.35.0
|
|
25
25
|
Requires-Dist: numpy>=2.2.4
|
|
26
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
26
27
|
Requires-Dist: pandas>=2.2.3
|
|
27
28
|
Requires-Dist: peewee>=4.0
|
|
28
29
|
Requires-Dist: plotly>=6.0.1
|
|
@@ -59,11 +60,12 @@ known at different publication dates.
|
|
|
59
60
|
|
|
60
61
|
## Features
|
|
61
62
|
|
|
62
|
-
- Fetch vintage-aware macroeconomic time series from FRED and
|
|
63
|
+
- Fetch vintage-aware macroeconomic time series from FRED, ONS, and the Philadelphia Fed's Real-Time Data Set (RTDSM)
|
|
63
64
|
- Store releases locally in SQLite for reproducible, offline-friendly workflows
|
|
64
65
|
- Retrieve series as they were known on a specific date with `as_of(...)`
|
|
65
66
|
- Filter both vintage windows and data windows when loading a series
|
|
66
|
-
-
|
|
67
|
+
- Recover which release an undated block of data came from with `identify_vintage(...)`
|
|
68
|
+
- Export to pandas DataFrames or Series and Darts `TimeSeries` objects
|
|
67
69
|
- Plot vintages and revision comparisons with built-in Plotly tooling
|
|
68
70
|
|
|
69
71
|
## Installation
|
|
@@ -127,6 +129,54 @@ gdp = MTTimeSeries(
|
|
|
127
129
|
)
|
|
128
130
|
```
|
|
129
131
|
|
|
132
|
+
The Philadelphia Fed's Real-Time Data Set (RTDSM) needs no API key. Use the
|
|
133
|
+
series mnemonic as the `dataset_id` and select the vintage frequency with the
|
|
134
|
+
`series_key`:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from macrotrace import MTTimeSeries
|
|
138
|
+
|
|
139
|
+
routput = MTTimeSeries(
|
|
140
|
+
dataset_id="ROUTPUT",
|
|
141
|
+
source="RTDSM",
|
|
142
|
+
series_key={"frequency": "Q"},
|
|
143
|
+
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
See the [RTDSM source guide](docs/sources/rtdsm.md) for the full list of series
|
|
147
|
+
and details on vintage frequencies.
|
|
148
|
+
|
|
149
|
+
### Identifying an Unknown Vintage
|
|
150
|
+
|
|
151
|
+
If you have a block of observations with no release date attached — for
|
|
152
|
+
example, a series lifted from a replication package — `identify_vintage`
|
|
153
|
+
compares it against every stored vintage and reports which release(s) it is
|
|
154
|
+
consistent with:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from macrotrace import MTTimeSeries
|
|
158
|
+
|
|
159
|
+
routput = MTTimeSeries(
|
|
160
|
+
dataset_id="ROUTPUT",
|
|
161
|
+
source="RTDSM",
|
|
162
|
+
series_key={"frequency": "Q"},
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# `unknown` is a date-indexed pandas Series whose vintage you want to recover
|
|
166
|
+
match = routput.identify_vintage(unknown)
|
|
167
|
+
|
|
168
|
+
if match.is_ambiguous:
|
|
169
|
+
print(f"Ambiguous — consistent with {len(match.release_dates)} vintages")
|
|
170
|
+
elif match.matched:
|
|
171
|
+
print(f"Matches the {match.release_date.date()} vintage")
|
|
172
|
+
else:
|
|
173
|
+
print("No matching vintage found")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
A match is ambiguous when the data is unchanged across consecutive vintages, so
|
|
177
|
+
the values alone cannot pin down a single release; `release_dates` lists every
|
|
178
|
+
consistent vintage in that case.
|
|
179
|
+
|
|
130
180
|
## Command-Line Tools
|
|
131
181
|
|
|
132
182
|
MacroTrace includes command-line tools for exploring ONS datasets:
|
|
@@ -19,11 +19,12 @@ known at different publication dates.
|
|
|
19
19
|
|
|
20
20
|
## Features
|
|
21
21
|
|
|
22
|
-
- Fetch vintage-aware macroeconomic time series from FRED and
|
|
22
|
+
- Fetch vintage-aware macroeconomic time series from FRED, ONS, and the Philadelphia Fed's Real-Time Data Set (RTDSM)
|
|
23
23
|
- Store releases locally in SQLite for reproducible, offline-friendly workflows
|
|
24
24
|
- Retrieve series as they were known on a specific date with `as_of(...)`
|
|
25
25
|
- Filter both vintage windows and data windows when loading a series
|
|
26
|
-
-
|
|
26
|
+
- Recover which release an undated block of data came from with `identify_vintage(...)`
|
|
27
|
+
- Export to pandas DataFrames or Series and Darts `TimeSeries` objects
|
|
27
28
|
- Plot vintages and revision comparisons with built-in Plotly tooling
|
|
28
29
|
|
|
29
30
|
## Installation
|
|
@@ -87,6 +88,54 @@ gdp = MTTimeSeries(
|
|
|
87
88
|
)
|
|
88
89
|
```
|
|
89
90
|
|
|
91
|
+
The Philadelphia Fed's Real-Time Data Set (RTDSM) needs no API key. Use the
|
|
92
|
+
series mnemonic as the `dataset_id` and select the vintage frequency with the
|
|
93
|
+
`series_key`:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from macrotrace import MTTimeSeries
|
|
97
|
+
|
|
98
|
+
routput = MTTimeSeries(
|
|
99
|
+
dataset_id="ROUTPUT",
|
|
100
|
+
source="RTDSM",
|
|
101
|
+
series_key={"frequency": "Q"},
|
|
102
|
+
)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
See the [RTDSM source guide](docs/sources/rtdsm.md) for the full list of series
|
|
106
|
+
and details on vintage frequencies.
|
|
107
|
+
|
|
108
|
+
### Identifying an Unknown Vintage
|
|
109
|
+
|
|
110
|
+
If you have a block of observations with no release date attached — for
|
|
111
|
+
example, a series lifted from a replication package — `identify_vintage`
|
|
112
|
+
compares it against every stored vintage and reports which release(s) it is
|
|
113
|
+
consistent with:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from macrotrace import MTTimeSeries
|
|
117
|
+
|
|
118
|
+
routput = MTTimeSeries(
|
|
119
|
+
dataset_id="ROUTPUT",
|
|
120
|
+
source="RTDSM",
|
|
121
|
+
series_key={"frequency": "Q"},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# `unknown` is a date-indexed pandas Series whose vintage you want to recover
|
|
125
|
+
match = routput.identify_vintage(unknown)
|
|
126
|
+
|
|
127
|
+
if match.is_ambiguous:
|
|
128
|
+
print(f"Ambiguous — consistent with {len(match.release_dates)} vintages")
|
|
129
|
+
elif match.matched:
|
|
130
|
+
print(f"Matches the {match.release_date.date()} vintage")
|
|
131
|
+
else:
|
|
132
|
+
print("No matching vintage found")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
A match is ambiguous when the data is unchanged across consecutive vintages, so
|
|
136
|
+
the values alone cannot pin down a single release; `release_dates` lists every
|
|
137
|
+
consistent vintage in that case.
|
|
138
|
+
|
|
90
139
|
## Command-Line Tools
|
|
91
140
|
|
|
92
141
|
MacroTrace includes command-line tools for exploring ONS datasets:
|
|
@@ -16,6 +16,7 @@ __all__ = [
|
|
|
16
16
|
"MTSeriesMetadata",
|
|
17
17
|
"MTTimeSeriesPlotter",
|
|
18
18
|
"VintageComparison",
|
|
19
|
+
"VintageMatch",
|
|
19
20
|
"__version__",
|
|
20
21
|
]
|
|
21
22
|
|
|
@@ -25,6 +26,7 @@ _LAZY_IMPORTS = {
|
|
|
25
26
|
"MTSeriesMetadata": ("macrotrace.models.mt.series_metadata", "MTSeriesMetadata"),
|
|
26
27
|
"MTTimeSeriesPlotter": ("macrotrace.models.mt.plotter", "MTTimeSeriesPlotter"),
|
|
27
28
|
"VintageComparison": ("macrotrace.models.mt.analysis", "VintageComparison"),
|
|
29
|
+
"VintageMatch": ("macrotrace.models.mt.time_series", "VintageMatch"),
|
|
28
30
|
}
|
|
29
31
|
|
|
30
32
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from macrotrace.models.mt.observation import MTObservation
|
|
2
2
|
from macrotrace.models.mt.series_metadata import MTSeriesMetadata
|
|
3
|
-
from macrotrace.models.mt.time_series import MTTimeSeries
|
|
3
|
+
from macrotrace.models.mt.time_series import MTTimeSeries, VintageMatch
|
|
4
4
|
from macrotrace.models.mt.analysis import VintageComparison
|
|
5
5
|
from macrotrace.models.mt.plotter import MTTimeSeriesPlotter
|
|
6
6
|
from macrotrace.models.db import (
|
|
@@ -19,6 +19,7 @@ __all__ = [
|
|
|
19
19
|
"MTSeriesMetadata",
|
|
20
20
|
"MTTimeSeries",
|
|
21
21
|
"VintageComparison",
|
|
22
|
+
"VintageMatch",
|
|
22
23
|
"MTTimeSeriesPlotter",
|
|
23
24
|
"LOCAL_DATABASE",
|
|
24
25
|
"Dataset",
|
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
"""MacroTrace models for time series data."""
|
|
2
2
|
|
|
3
|
-
from macrotrace.models.mt.time_series import MTTimeSeries
|
|
3
|
+
from macrotrace.models.mt.time_series import MTTimeSeries, VintageMatch
|
|
4
4
|
from macrotrace.models.mt.analysis import VintageComparison
|
|
5
5
|
from macrotrace.models.mt.series_metadata import MTSeriesMetadata
|
|
6
6
|
from macrotrace.models.mt.observation import MTObservation
|
|
7
7
|
|
|
8
|
-
__all__ = [
|
|
8
|
+
__all__ = [
|
|
9
|
+
"MTTimeSeries",
|
|
10
|
+
"VintageComparison",
|
|
11
|
+
"VintageMatch",
|
|
12
|
+
"MTSeriesMetadata",
|
|
13
|
+
"MTObservation",
|
|
14
|
+
]
|
|
@@ -396,7 +396,7 @@ class MTTimeSeriesPlotter:
|
|
|
396
396
|
)
|
|
397
397
|
|
|
398
398
|
rows = []
|
|
399
|
-
for vintage in self.ts._vintages_including_current_series
|
|
399
|
+
for vintage in self.ts._vintages_including_current_series:
|
|
400
400
|
seasonal_period = vintage.metadata.get_frequency_as_numeric()
|
|
401
401
|
minimum_observations = 2 * seasonal_period
|
|
402
402
|
if len(vintage.current_observations) < minimum_observations:
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, List, Optional, Dict, Any
|
|
2
|
-
from dataclasses import replace
|
|
2
|
+
from dataclasses import dataclass, replace
|
|
3
3
|
from dateutil import parser
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
5
|
|
|
6
|
+
import numpy as np
|
|
6
7
|
import pandas as pd
|
|
7
8
|
from tabulate import tabulate
|
|
8
9
|
from darts import TimeSeries
|
|
@@ -28,10 +29,75 @@ import logging
|
|
|
28
29
|
|
|
29
30
|
logger = logging.getLogger(__name__)
|
|
30
31
|
|
|
31
|
-
VALID_SOURCES = ["FRED", "ONS", "USER"]
|
|
32
|
+
VALID_SOURCES = ["FRED", "ONS", "RTDSM", "USER"]
|
|
32
33
|
# USER is for user provided data, not from an API
|
|
33
34
|
|
|
34
35
|
|
|
36
|
+
@dataclass
|
|
37
|
+
class VintageMatch:
|
|
38
|
+
"""
|
|
39
|
+
Result of matching an undated data series against the vintages of an MTTimeSeries (see ``MTTimeSeries.identify_vintage``).
|
|
40
|
+
|
|
41
|
+
A match is ambiguous when the supplied data is consistent with more than one vintage.
|
|
42
|
+
This is common when the data only covers observations that were never revised across a run of consecutive vintages, so the values alone cannot pin down a single release.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
release_dates: Release dates of every vintage whose values matched the supplied data, sorted oldest to newest. Empty when nothing matched.
|
|
46
|
+
n_observations: Number of non-null observations from the supplied data that were compared against each vintage.
|
|
47
|
+
rtol: Relative tolerance used for the value comparison.
|
|
48
|
+
atol: Absolute tolerance used for the value comparison.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
release_dates: List[datetime]
|
|
52
|
+
n_observations: int
|
|
53
|
+
rtol: float
|
|
54
|
+
atol: float
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def matched(self) -> bool:
|
|
58
|
+
"""True if the supplied data matched at least one vintage."""
|
|
59
|
+
return len(self.release_dates) > 0
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def is_ambiguous(self) -> bool:
|
|
63
|
+
"""True if the supplied data matched more than one vintage."""
|
|
64
|
+
return len(self.release_dates) > 1
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def release_date(self) -> Optional[datetime]:
|
|
68
|
+
"""
|
|
69
|
+
The single matching vintage's release date.
|
|
70
|
+
|
|
71
|
+
Returns None when there was no match or when the match was ambiguous (more than one vintage matched).
|
|
72
|
+
Inspect ``release_dates`` in the ambiguous case.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Optional[datetime]: The unambiguously matched release date, else None.
|
|
76
|
+
"""
|
|
77
|
+
return self.release_dates[0] if len(self.release_dates) == 1 else None
|
|
78
|
+
|
|
79
|
+
def __repr__(self) -> str:
|
|
80
|
+
"""
|
|
81
|
+
Returns a human-readable summary of the match result.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
str: String representation of the match result.
|
|
85
|
+
"""
|
|
86
|
+
compared = f"compared {self.n_observations} observation(s)"
|
|
87
|
+
if not self.matched:
|
|
88
|
+
return f"VintageMatch(no matching vintage found; {compared})"
|
|
89
|
+
if self.is_ambiguous:
|
|
90
|
+
dates = ", ".join(d.strftime("%Y-%m-%d") for d in self.release_dates)
|
|
91
|
+
return (
|
|
92
|
+
f"VintageMatch(ambiguous - matched {len(self.release_dates)} "
|
|
93
|
+
f"vintages: {dates}; {compared})"
|
|
94
|
+
)
|
|
95
|
+
return (
|
|
96
|
+
f"VintageMatch(matched vintage "
|
|
97
|
+
f"{self.release_dates[0].strftime('%Y-%m-%d')}; {compared})"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
35
101
|
class MTTimeSeries:
|
|
36
102
|
|
|
37
103
|
def __init__(
|
|
@@ -254,15 +320,15 @@ class MTTimeSeries:
|
|
|
254
320
|
"""
|
|
255
321
|
|
|
256
322
|
min_release_date = min(
|
|
257
|
-
[v.release_date for v in self._vintages_including_current_series
|
|
323
|
+
[v.release_date for v in self._vintages_including_current_series],
|
|
258
324
|
default=None,
|
|
259
325
|
)
|
|
260
326
|
max_release_date = max(
|
|
261
|
-
[v.release_date for v in self._vintages_including_current_series
|
|
327
|
+
[v.release_date for v in self._vintages_including_current_series],
|
|
262
328
|
default=None,
|
|
263
329
|
)
|
|
264
330
|
|
|
265
|
-
timestamp_format = self.
|
|
331
|
+
timestamp_format = self._timestamp_format
|
|
266
332
|
|
|
267
333
|
title = f"{self.metadata.title}"
|
|
268
334
|
header = f"\nTime Series: {self.dataset_id} ({title})"
|
|
@@ -348,6 +414,123 @@ class MTTimeSeries:
|
|
|
348
414
|
|
|
349
415
|
return as_of_vintage
|
|
350
416
|
|
|
417
|
+
def identify_vintage(
|
|
418
|
+
self,
|
|
419
|
+
series: pd.Series,
|
|
420
|
+
rtol: float = 1e-05,
|
|
421
|
+
atol: float = 1e-08,
|
|
422
|
+
require_exact_coverage: bool = False,
|
|
423
|
+
) -> VintageMatch:
|
|
424
|
+
"""
|
|
425
|
+
Identify which vintage(s) a block of undated data came from.
|
|
426
|
+
|
|
427
|
+
Replication packages frequently ship a series of observations with no release date attached, only a source.
|
|
428
|
+
This compares the supplied data against every vintage in this MTTimeSeries and reports the release date(s) whose values it is consistent with, so you can recover the vintage you are actually working with.
|
|
429
|
+
|
|
430
|
+
The supplied data is treated as a (possibly incomplete) window of a vintage: every timestamp in ``series`` must be present in a vintage and its values must agree (within tolerance) for that vintage to match.
|
|
431
|
+
A vintage may carry extra observations the data does not include.
|
|
432
|
+
When the data does not change across consecutive vintages the match is necessarily ambiguous, and all consistent release dates are returned.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
series (pd.Series): The undated data to identify, indexed by date.
|
|
436
|
+
The index becomes the observation timestamps and the values are compared against each vintage.
|
|
437
|
+
A tz-naive index is assumed to be UTC, and null values are dropped before matching.
|
|
438
|
+
rtol (float): Relative tolerance for the value comparison, passed through to ``numpy.isclose``. Defaults to 1e-05.
|
|
439
|
+
atol (float): Absolute tolerance for the value comparison, passed through to ``numpy.isclose``. Defaults to 1e-08.
|
|
440
|
+
require_exact_coverage (bool): If True, a vintage only matches when its timestamps are exactly the timestamps in ``series``, rather than allowing the data to be a sub-window of the vintage. Defaults to False.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
VintageMatch: The matching release date(s) and comparison details. Check ``matched`` to see whether at least one vintage matched.
|
|
444
|
+
|
|
445
|
+
Raises:
|
|
446
|
+
TypeError: If ``series`` is not a pandas Series.
|
|
447
|
+
ValueError: If ``series`` is empty, has a non-date or duplicated index, or contains no non-null observations.
|
|
448
|
+
"""
|
|
449
|
+
candidate = self._prepare_candidate_series(series)
|
|
450
|
+
|
|
451
|
+
matches: List[datetime] = []
|
|
452
|
+
for vintage in self._vintages_including_current_series:
|
|
453
|
+
vintage_df = vintage.to_dataframe(mode="default", tz="utc")
|
|
454
|
+
vintage_series = vintage_df.set_index("timestamp")["value"]
|
|
455
|
+
|
|
456
|
+
# Every supplied timestamp must exist in the vintage, otherwise the data cannot be a window of it.
|
|
457
|
+
if not candidate.index.isin(vintage_series.index).all():
|
|
458
|
+
continue
|
|
459
|
+
|
|
460
|
+
# With exact coverage the vintage must hold exactly the supplied timestamps and nothing more.
|
|
461
|
+
if (
|
|
462
|
+
require_exact_coverage
|
|
463
|
+
and not vintage_series.index.isin(candidate.index).all()
|
|
464
|
+
):
|
|
465
|
+
continue
|
|
466
|
+
|
|
467
|
+
aligned = vintage_series.reindex(candidate.index)
|
|
468
|
+
if np.isclose(
|
|
469
|
+
candidate.to_numpy(dtype=float),
|
|
470
|
+
aligned.to_numpy(dtype=float),
|
|
471
|
+
rtol=rtol,
|
|
472
|
+
atol=atol,
|
|
473
|
+
).all():
|
|
474
|
+
matches.append(vintage.release_date)
|
|
475
|
+
|
|
476
|
+
return VintageMatch(
|
|
477
|
+
release_dates=sorted(matches),
|
|
478
|
+
n_observations=len(candidate),
|
|
479
|
+
rtol=rtol,
|
|
480
|
+
atol=atol,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
def _prepare_candidate_series(self, series: pd.Series) -> pd.Series:
|
|
484
|
+
"""
|
|
485
|
+
Validate and normalize a user-supplied data series for vintage matching.
|
|
486
|
+
|
|
487
|
+
Coerces the values to numeric, drops nulls, and renders the index as a sorted, unique, tz-aware UTC DatetimeIndex so it lines up with the timestamps produced by ``to_dataframe(tz="utc")``.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
series (pd.Series): The user-supplied data indexed by date.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
pd.Series: The cleaned candidate series indexed by UTC timestamps.
|
|
494
|
+
|
|
495
|
+
Raises:
|
|
496
|
+
TypeError: If ``series`` is not a pandas Series.
|
|
497
|
+
ValueError: If ``series`` is empty, has a non-date or duplicated index, or contains no non-null observations.
|
|
498
|
+
"""
|
|
499
|
+
if not isinstance(series, pd.Series):
|
|
500
|
+
raise TypeError(
|
|
501
|
+
f"series must be a pandas Series, got {type(series).__name__}."
|
|
502
|
+
)
|
|
503
|
+
if series.empty:
|
|
504
|
+
raise ValueError("The series is empty. There is nothing to match against.")
|
|
505
|
+
|
|
506
|
+
candidate = pd.to_numeric(series, errors="raise").dropna()
|
|
507
|
+
if candidate.empty:
|
|
508
|
+
raise ValueError("The series contains no non-null observations to match.")
|
|
509
|
+
|
|
510
|
+
try:
|
|
511
|
+
index = pd.to_datetime(candidate.index)
|
|
512
|
+
except (ValueError, TypeError) as exc:
|
|
513
|
+
raise ValueError(
|
|
514
|
+
"The series must be indexed by dates that pandas can parse."
|
|
515
|
+
) from exc
|
|
516
|
+
|
|
517
|
+
if not isinstance(index, pd.DatetimeIndex):
|
|
518
|
+
raise ValueError("The series must be indexed by dates, not scalar values.")
|
|
519
|
+
|
|
520
|
+
if index.tz is None:
|
|
521
|
+
logger.warning(
|
|
522
|
+
"The series index has no timezone information. Assuming UTC."
|
|
523
|
+
)
|
|
524
|
+
index = index.tz_localize("UTC")
|
|
525
|
+
else:
|
|
526
|
+
index = index.tz_convert("UTC")
|
|
527
|
+
|
|
528
|
+
if index.has_duplicates:
|
|
529
|
+
raise ValueError("The series index contains duplicate timestamps.")
|
|
530
|
+
|
|
531
|
+
candidate.index = index
|
|
532
|
+
return candidate.sort_index()
|
|
533
|
+
|
|
351
534
|
### Theoretically if the units change, we should not be able to compare them
|
|
352
535
|
def generate_vintage_matrix(self) -> pd.DataFrame:
|
|
353
536
|
"""
|
|
@@ -360,7 +543,7 @@ class MTTimeSeries:
|
|
|
360
543
|
"""
|
|
361
544
|
|
|
362
545
|
vintage_dfs = [
|
|
363
|
-
v.to_dataframe() for v in self._vintages_including_current_series
|
|
546
|
+
v.to_dataframe() for v in self._vintages_including_current_series
|
|
364
547
|
]
|
|
365
548
|
|
|
366
549
|
merged_df = pd.concat(vintage_dfs, axis=0, ignore_index=True)
|
|
@@ -414,7 +597,7 @@ class MTTimeSeries:
|
|
|
414
597
|
historical_metadata = {}
|
|
415
598
|
|
|
416
599
|
# Iterate forward through vintages to find first appearance of each metadata
|
|
417
|
-
all_vintages = self._vintages_including_current_series
|
|
600
|
+
all_vintages = self._vintages_including_current_series
|
|
418
601
|
|
|
419
602
|
if not all_vintages:
|
|
420
603
|
return historical_metadata
|
|
@@ -573,7 +756,7 @@ class MTTimeSeries:
|
|
|
573
756
|
]
|
|
574
757
|
)
|
|
575
758
|
|
|
576
|
-
# utc=True is required: source-
|
|
759
|
+
# utc=True is required: source-localized observations carry per-row pytz
|
|
577
760
|
# tzinfo objects (e.g. distinct CST and CDT singletons from
|
|
578
761
|
# America/Chicago), and pandas refuses to build a single datetime64[ns, tz]
|
|
579
762
|
# column from mixed offsets without it. Anchoring on UTC preserves
|
|
@@ -593,6 +776,27 @@ class MTTimeSeries:
|
|
|
593
776
|
|
|
594
777
|
return df
|
|
595
778
|
|
|
779
|
+
def to_series(self, mode: str = "default", tz: str = "utc") -> pd.Series:
|
|
780
|
+
"""
|
|
781
|
+
Converts the current observations of the time series to a date-indexed pandas Series.
|
|
782
|
+
|
|
783
|
+
This is the values-only counterpart to ``to_dataframe``: the observation timestamps become the index and the values become the data.
|
|
784
|
+
The Series is named after ``dataset_id`` so it carries a meaningful label when plotted or concatenated alongside other series.
|
|
785
|
+
|
|
786
|
+
Args:
|
|
787
|
+
mode (str, optional): The mode for which the series is provided.
|
|
788
|
+
Supports "default" (unmodified observations), "first_difference" (first differences of observations), and "pct_change" (percentage change of observations).
|
|
789
|
+
Defaults to "default".
|
|
790
|
+
tz (str, optional): How to render the index. ``"utc"`` (default) returns a tz-aware UTC index; ``"source"`` returns a tz-naive index on the source's wall-clock calendar. See ``to_dataframe`` for the full explanation.
|
|
791
|
+
|
|
792
|
+
Returns:
|
|
793
|
+
pd.Series: The observation values indexed by timestamp, named after the dataset_id.
|
|
794
|
+
"""
|
|
795
|
+
df = self.to_dataframe(mode=mode, tz=tz)
|
|
796
|
+
series = df.set_index("timestamp")["value"]
|
|
797
|
+
series.name = self.dataset_id
|
|
798
|
+
return series
|
|
799
|
+
|
|
596
800
|
def _find_eligible_vintages(self, target_date: datetime) -> List["MTTimeSeries"]:
|
|
597
801
|
"""
|
|
598
802
|
Finds eligible vintages based on (before or equal to) the target date.
|
|
@@ -605,7 +809,7 @@ class MTTimeSeries:
|
|
|
605
809
|
"""
|
|
606
810
|
return [
|
|
607
811
|
v
|
|
608
|
-
for v in self._vintages_including_current_series
|
|
812
|
+
for v in self._vintages_including_current_series
|
|
609
813
|
if v.release_date <= target_date
|
|
610
814
|
]
|
|
611
815
|
|
|
@@ -625,10 +829,11 @@ class MTTimeSeries:
|
|
|
625
829
|
inferred_freq = pd.infer_freq(pd.DatetimeIndex(timestamps))
|
|
626
830
|
return inferred_freq
|
|
627
831
|
|
|
628
|
-
|
|
832
|
+
@property
|
|
833
|
+
def _timestamp_format(self) -> str:
|
|
629
834
|
"""
|
|
630
|
-
|
|
631
|
-
|
|
835
|
+
The appropriate strftime format string based on the series frequency.
|
|
836
|
+
Sub-daily frequencies include time and timezone, while daily and above show only the date.
|
|
632
837
|
|
|
633
838
|
Returns:
|
|
634
839
|
str: The strftime format string.
|
|
@@ -637,14 +842,14 @@ class MTTimeSeries:
|
|
|
637
842
|
return "%Y-%m-%d"
|
|
638
843
|
|
|
639
844
|
# Create a base date and add one frequency period to it
|
|
640
|
-
# If the difference is less than 1 day, it's a
|
|
845
|
+
# If the difference is less than 1 day, it's a sub-daily frequency
|
|
641
846
|
base_date = pd.Timestamp("2020-01-01")
|
|
642
847
|
next_date = base_date + pd.tseries.frequencies.to_offset(
|
|
643
848
|
self.metadata.frequency
|
|
644
849
|
)
|
|
645
|
-
|
|
850
|
+
is_sub_daily = (next_date - base_date) < pd.Timedelta(days=1)
|
|
646
851
|
|
|
647
|
-
if
|
|
852
|
+
if is_sub_daily:
|
|
648
853
|
return "%Y-%m-%d %H:%M:%S %Z"
|
|
649
854
|
else:
|
|
650
855
|
return "%Y-%m-%d"
|
|
@@ -843,8 +1048,9 @@ class MTTimeSeries:
|
|
|
843
1048
|
)
|
|
844
1049
|
return releases
|
|
845
1050
|
|
|
846
|
-
|
|
847
|
-
|
|
1051
|
+
@property
|
|
1052
|
+
def _vintage_window_description(self) -> str:
|
|
1053
|
+
"""A human-readable description of the requested vintage window."""
|
|
848
1054
|
fmt = "%Y-%m-%d"
|
|
849
1055
|
start = (
|
|
850
1056
|
self.vintage_start_date.astimezone(timezone.utc).strftime(fmt)
|
|
@@ -865,9 +1071,10 @@ class MTTimeSeries:
|
|
|
865
1071
|
return f"on or before {end}"
|
|
866
1072
|
return "for all vintages"
|
|
867
1073
|
|
|
1074
|
+
@property
|
|
868
1075
|
def _vintages_including_current_series(self) -> List["MTTimeSeries"]:
|
|
869
1076
|
"""
|
|
870
|
-
|
|
1077
|
+
A list of all vintages including the current series.
|
|
871
1078
|
|
|
872
1079
|
Returns:
|
|
873
1080
|
List[MTTimeSeries]: A list of all vintages including the current series.
|
|
@@ -882,10 +1089,12 @@ class MTTimeSeries:
|
|
|
882
1089
|
"""
|
|
883
1090
|
from macrotrace.sources.fred import FredUpdateManager
|
|
884
1091
|
from macrotrace.sources.ons import ONSUpdateManager
|
|
1092
|
+
from macrotrace.sources.rtdsm import RTDSMUpdateManager
|
|
885
1093
|
|
|
886
1094
|
source_managers = {
|
|
887
1095
|
"FRED": FredUpdateManager,
|
|
888
1096
|
"ONS": ONSUpdateManager,
|
|
1097
|
+
"RTDSM": RTDSMUpdateManager,
|
|
889
1098
|
}
|
|
890
1099
|
|
|
891
1100
|
assert (
|
|
@@ -1023,7 +1232,7 @@ class MTTimeSeries:
|
|
|
1023
1232
|
f"No vintages available for dataset {state.dataset.dataset_id} "
|
|
1024
1233
|
f"and series key {state.series.series_key} "
|
|
1025
1234
|
f"within the requested vintage window "
|
|
1026
|
-
f"({self.
|
|
1235
|
+
f"({self._vintage_window_description})."
|
|
1027
1236
|
)
|
|
1028
1237
|
raise ValueError(
|
|
1029
1238
|
f"No time series data found for dataset {state.dataset.dataset_id} "
|