macrotrace 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macrotrace-0.2.2 → macrotrace-0.3.0}/.github/workflows/docs.yml +1 -1
- {macrotrace-0.2.2 → macrotrace-0.3.0}/CHANGELOG.md +16 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/PKG-INFO +1 -1
- macrotrace-0.3.0/macrotrace/_time.py +19 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/db.py +10 -15
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/mt/analysis.py +8 -3
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/mt/plotter.py +6 -15
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/mt/time_series.py +74 -55
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/sources/fred.py +11 -51
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/sources/ons.py +5 -2
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/sources/rtdsm.py +3 -16
- {macrotrace-0.2.2 → macrotrace-0.3.0}/scripts/backstop_ingest.py +5 -11
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/mt/series/test_init.py +26 -7
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/mt/series/test_series.py +131 -10
- macrotrace-0.3.0/tests/models/mt/series/test_window_source_local_bounds.py +124 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/mt/test_analysis.py +49 -44
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/mt/test_plotter.py +2 -4
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/test_fred_dataset_manager.py +14 -36
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/test_fred_release_manager.py +0 -25
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/test_fred_tz_handling.py +0 -40
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/test_rtdsm_helpers.py +1 -9
- macrotrace-0.3.0/tests/test_time.py +40 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/.github/workflows/ci.yml +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/.github/workflows/release.yml +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/.gitignore +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/.pre-commit-config.yaml +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/.python-version +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/LICENSE +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/README.md +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/__init__.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/_paths.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/cli.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/graphing.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/__init__.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/mt/__init__.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/mt/observation.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/models/mt/series_metadata.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/ons_cli/__init__.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/ons_cli/cli.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/ons_cli/common.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/ons_cli/tui.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/py.typed +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/sources/__init__.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/sources/base.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/macrotrace/sources/example.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/pyproject.toml +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/assets/mt/time_series/expected_vm.csv +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/assets/mt/time_series/from_dataframe.csv +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/assets/mt/time_series/from_dataframe_with_tz.csv +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/mt/series/test_db_path_forwarding.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/mt/test_metadata.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/mt/utils.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/models/test_db_models.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/ons_cli/test_cli.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/ons_cli/test_common.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/ons_cli/test_root_cli.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/ons_cli/test_tui.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/ons_cli/utils.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/fixtures.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_base_api_client.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_base_dataset_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_base_observation_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_base_release_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_base_series_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_base_update_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_base_update_state.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/base/test_db_path_resolution.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/fixtures.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/test_fred_api_client.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/test_fred_observation_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/test_fred_series_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/fred/test_fred_update_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/ons/fixtures.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/ons/test_ons_api_client.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/ons/test_ons_dataset_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/ons/test_ons_observation_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/ons/test_ons_release_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/ons/test_ons_series_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/ons/test_ons_update_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/fixtures.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/test_rtdsm_api_client.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/test_rtdsm_dataset_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/test_rtdsm_observation_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/test_rtdsm_release_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/test_rtdsm_series_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/sources/rtdsm/test_rtdsm_update_manager.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/test_package_init.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/tests/test_paths.py +0 -0
- {macrotrace-0.2.2 → macrotrace-0.3.0}/uv.lock +0 -0
|
@@ -49,7 +49,7 @@ jobs:
|
|
|
49
49
|
if [[ "$VERSION" =~ (rc|a|b|dev|alpha|beta) ]]; then
|
|
50
50
|
uv run mike deploy --push "$VERSION"
|
|
51
51
|
else
|
|
52
|
-
uv run mike deploy --push --update-aliases "$VERSION" latest
|
|
52
|
+
uv run mike deploy --push --update-aliases --alias-type copy "$VERSION" latest
|
|
53
53
|
uv run mike set-default --push latest
|
|
54
54
|
fi
|
|
55
55
|
|
|
@@ -3,6 +3,22 @@
|
|
|
3
3
|
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/);
|
|
4
4
|
versions follow [SemVer](https://semver.org/).
|
|
5
5
|
|
|
6
|
+
## 0.3.0 — 2026-06-15
|
|
7
|
+
|
|
8
|
+
- **Breaking:** Naive date inputs (`as_of`, the vintage/data windows,
|
|
9
|
+
`vintage_comparison`) are read on the source's clock instead of UTC.
|
|
10
|
+
This fixes same-day vintages being silently skipped for FRED,
|
|
11
|
+
which stamps releases at midnight US Central:
|
|
12
|
+
`as_of("2018-03-16")` now returns the 2018-03-16 vintage, and a
|
|
13
|
+
`data_end_date` on a period boundary no longer drops the final observation.
|
|
14
|
+
Timezone-aware datetimes still compare as exact instants.
|
|
15
|
+
- **Breaking:** Date strings must be `YYYY-MM-DD`; `datetime.date` objects
|
|
16
|
+
are now accepted. Free-text parsing is gone (along with the `dateutil`
|
|
17
|
+
dependency), so ambiguous formats like `"03/04/2018"` are rejected instead
|
|
18
|
+
of guessed.
|
|
19
|
+
- **Fixed:** `created_at` columns stamped each row with the process start
|
|
20
|
+
time instead of its actual creation time.
|
|
21
|
+
|
|
6
22
|
## 0.2.2 — 2026-06-12
|
|
7
23
|
|
|
8
24
|
- **Vintage matching:** `identify_vintage` now interprets a tz-naive index in
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: macrotrace
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A Python library for managing and analyzing macroeconomic time series data with vintage awareness.
|
|
5
5
|
Project-URL: Homepage, https://github.com/john-ramsey/macrotrace
|
|
6
6
|
Project-URL: Repository, https://github.com/john-ramsey/macrotrace
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from datetime import datetime, tzinfo
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def ensure_timezone(dt: Optional[datetime], tz: tzinfo) -> Optional[datetime]:
|
|
6
|
+
"""
|
|
7
|
+
Return the datetime made aware in ``tz``, or None.
|
|
8
|
+
|
|
9
|
+
Naive datetimes keep their wall clock while aware ones are converted. Uses pytz's
|
|
10
|
+
``localize()`` when available so the real historical offset is picked
|
|
11
|
+
instead of pytz's first entry for the zone (LMT).
|
|
12
|
+
"""
|
|
13
|
+
if dt is None:
|
|
14
|
+
return None
|
|
15
|
+
if dt.tzinfo is None:
|
|
16
|
+
if hasattr(tz, "localize"):
|
|
17
|
+
return tz.localize(dt)
|
|
18
|
+
return dt.replace(tzinfo=tz)
|
|
19
|
+
return dt.astimezone(tz)
|
|
@@ -40,6 +40,11 @@ def is_valid_dateoffset(value: str) -> bool:
|
|
|
40
40
|
return False
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
def _utc_now() -> datetime.datetime:
|
|
44
|
+
"""Callable default so each row stamps its own creation time, not the import time."""
|
|
45
|
+
return datetime.datetime.now(tz=datetime.timezone.utc)
|
|
46
|
+
|
|
47
|
+
|
|
43
48
|
class StrictDateTimeField(DateTimeField):
|
|
44
49
|
"""DateTimeField that enforces timezone-aware datetime objects in ISO 8601 format."""
|
|
45
50
|
|
|
@@ -122,9 +127,7 @@ class DatasetDimension(DataBaseModel):
|
|
|
122
127
|
# Validity period for this dimension definition, null valid_to means currently valid
|
|
123
128
|
valid_from = StrictDateTimeField()
|
|
124
129
|
valid_to = StrictDateTimeField(null=True)
|
|
125
|
-
created_at = StrictDateTimeField(
|
|
126
|
-
default=datetime.datetime.now(tz=datetime.timezone.utc)
|
|
127
|
-
)
|
|
130
|
+
created_at = StrictDateTimeField(default=_utc_now)
|
|
128
131
|
|
|
129
132
|
class Meta:
|
|
130
133
|
constraints = [
|
|
@@ -152,9 +155,7 @@ class Release(DataBaseModel):
|
|
|
152
155
|
)
|
|
153
156
|
release_date = StrictDateTimeField()
|
|
154
157
|
additional_metadata = JSONField(null=True)
|
|
155
|
-
created_at = StrictDateTimeField(
|
|
156
|
-
default=datetime.datetime.now(tz=datetime.timezone.utc)
|
|
157
|
-
)
|
|
158
|
+
created_at = StrictDateTimeField(default=_utc_now)
|
|
158
159
|
|
|
159
160
|
class Meta:
|
|
160
161
|
constraints = [SQL("UNIQUE(dataset_id, release_date)")]
|
|
@@ -182,9 +183,7 @@ class ReleaseDimension(DataBaseModel):
|
|
|
182
183
|
backref="release_dimensions",
|
|
183
184
|
on_delete="CASCADE",
|
|
184
185
|
)
|
|
185
|
-
created_at = StrictDateTimeField(
|
|
186
|
-
default=datetime.datetime.now(tz=datetime.timezone.utc)
|
|
187
|
-
)
|
|
186
|
+
created_at = StrictDateTimeField(default=_utc_now)
|
|
188
187
|
|
|
189
188
|
class Meta:
|
|
190
189
|
constraints = [SQL("UNIQUE(release_id, dimension_id)")]
|
|
@@ -199,9 +198,7 @@ class ReleaseDimension(DataBaseModel):
|
|
|
199
198
|
class Series(DataBaseModel):
|
|
200
199
|
dataset = ForeignKeyField(Dataset, backref="series", on_delete="CASCADE")
|
|
201
200
|
series_key = JSONField()
|
|
202
|
-
created_at = StrictDateTimeField(
|
|
203
|
-
default=datetime.datetime.now(tz=datetime.timezone.utc)
|
|
204
|
-
)
|
|
201
|
+
created_at = StrictDateTimeField(default=_utc_now)
|
|
205
202
|
|
|
206
203
|
def __repr__(self):
|
|
207
204
|
return (
|
|
@@ -249,9 +246,7 @@ class Observation(DataBaseModel):
|
|
|
249
246
|
|
|
250
247
|
observation_timestamp = StrictDateTimeField()
|
|
251
248
|
value = FloatField(null=True) # null if the observation is missing
|
|
252
|
-
created_at = StrictDateTimeField(
|
|
253
|
-
default=datetime.datetime.now(tz=datetime.timezone.utc)
|
|
254
|
-
)
|
|
249
|
+
created_at = StrictDateTimeField(default=_utc_now)
|
|
255
250
|
|
|
256
251
|
class Meta:
|
|
257
252
|
constraints = [SQL("UNIQUE(release_id, observation_timestamp)")]
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
|
|
5
|
+
import datetime
|
|
5
6
|
import logging
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
@@ -510,7 +511,6 @@ class MTTimeSeriesAnalysis:
|
|
|
510
511
|
darts_ts = self.ts.to_darts_timeseries()
|
|
511
512
|
|
|
512
513
|
if len(darts_ts) >= (min_train_size + 1):
|
|
513
|
-
|
|
514
514
|
for i in range(min_train_size, len(darts_ts)):
|
|
515
515
|
train = darts_ts[:i]
|
|
516
516
|
test = darts_ts[i : i + 1]
|
|
@@ -629,7 +629,10 @@ class MTTimeSeriesAnalysis:
|
|
|
629
629
|
)
|
|
630
630
|
|
|
631
631
|
def vintage_comparison(
|
|
632
|
-
self,
|
|
632
|
+
self,
|
|
633
|
+
vintage_dates: List[str | datetime.datetime | datetime.date],
|
|
634
|
+
mode: str = "growth",
|
|
635
|
+
strategy: str = "all",
|
|
633
636
|
) -> "VintageComparison":
|
|
634
637
|
"""
|
|
635
638
|
Compare vintages across summary measures describing revisions of a
|
|
@@ -677,7 +680,9 @@ class MTTimeSeriesAnalysis:
|
|
|
677
680
|
period-over-period change in the level changes sign between vintages.
|
|
678
681
|
|
|
679
682
|
Args:
|
|
680
|
-
vintage_dates (List[str]):
|
|
683
|
+
vintage_dates (List[str | datetime | date]): The vintages to
|
|
684
|
+
compare, each resolved through ``as_of()`` — a ``YYYY-MM-DD``
|
|
685
|
+
string or date for a calendar day, or a datetime for an exact instant.
|
|
681
686
|
mode (str): The mode of comparison ("growth" or "levels").
|
|
682
687
|
strategy (str): The strategy for comparison ("sequential", "final", or "all").
|
|
683
688
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
2
|
-
from datetime import datetime
|
|
2
|
+
from datetime import date, datetime
|
|
3
3
|
import warnings
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
@@ -147,9 +147,8 @@ class MTTimeSeriesPlotter:
|
|
|
147
147
|
go.Figure: Plotly figure showing observation revisions over time.
|
|
148
148
|
"""
|
|
149
149
|
if isinstance(observation_datetime, str):
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
) # Returns UTC timezone
|
|
150
|
+
# Resolves to the source's midnight on that day
|
|
151
|
+
observation_datetime = self.ts._parse_string_date(observation_datetime)
|
|
153
152
|
elif not isinstance(observation_datetime, datetime):
|
|
154
153
|
raise ValueError(
|
|
155
154
|
f"Invalid observation datetime type: {type(observation_datetime)}. Must be a string or a datetime."
|
|
@@ -588,7 +587,7 @@ class MTTimeSeriesPlotter:
|
|
|
588
587
|
|
|
589
588
|
def timeseries_comparison(
|
|
590
589
|
self,
|
|
591
|
-
vintage_dates: List[str | datetime],
|
|
590
|
+
vintage_dates: List[str | datetime | date],
|
|
592
591
|
chart_type: str = "bar",
|
|
593
592
|
mode: str = "default",
|
|
594
593
|
y_axis_zero_indexed: bool = False,
|
|
@@ -597,7 +596,7 @@ class MTTimeSeriesPlotter:
|
|
|
597
596
|
Plots a comparison of time series vintages.
|
|
598
597
|
|
|
599
598
|
Args:
|
|
600
|
-
vintage_dates (List[str | datetime]): List of vintage identifiers
|
|
599
|
+
vintage_dates (List[str | datetime | date]): List of vintage identifiers, resolved via ``as_of``. Ex. '2025-11-01'
|
|
601
600
|
chart_type (str, optional): Type of chart to plot. Either "bar" or "line". Defaults to "bar".
|
|
602
601
|
mode (str, optional): The mode for which the dataframe is provided. Supports "default", "first_difference", and "pct_change". Defaults to "default".
|
|
603
602
|
y_axis_zero_indexed (bool, optional): Sets base of the y-axis to zero.
|
|
@@ -610,14 +609,6 @@ class MTTimeSeriesPlotter:
|
|
|
610
609
|
f"Invalid mode: {mode}. Supported modes are 'default', 'first_difference', and 'pct_change'."
|
|
611
610
|
)
|
|
612
611
|
|
|
613
|
-
for vintage_date in vintage_dates:
|
|
614
|
-
if (not isinstance(vintage_date, str)) and (
|
|
615
|
-
not isinstance(vintage_date, datetime)
|
|
616
|
-
):
|
|
617
|
-
raise TypeError(
|
|
618
|
-
"Vintage dates must be provided as strings or datetime objects."
|
|
619
|
-
)
|
|
620
|
-
|
|
621
612
|
fig = go.Figure()
|
|
622
613
|
all_values = []
|
|
623
614
|
hoverinfo = "x+y+name"
|
|
@@ -626,7 +617,7 @@ class MTTimeSeriesPlotter:
|
|
|
626
617
|
df = self.ts.as_of(vintage_date).to_dataframe(mode=mode)
|
|
627
618
|
vintage_date = (
|
|
628
619
|
vintage_date.strftime("%Y-%m-%d")
|
|
629
|
-
if isinstance(vintage_date,
|
|
620
|
+
if isinstance(vintage_date, date)
|
|
630
621
|
else vintage_date
|
|
631
622
|
)
|
|
632
623
|
if chart_type == "bar":
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, List, Optional, Dict, Any, Tuple
|
|
2
2
|
from dataclasses import dataclass, replace
|
|
3
|
-
from
|
|
4
|
-
from datetime import datetime, timedelta, timezone, tzinfo
|
|
3
|
+
from datetime import date, datetime, timedelta, timezone, tzinfo
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
6
|
import pandas as pd
|
|
@@ -10,6 +9,7 @@ from tabulate import tabulate
|
|
|
10
9
|
from darts import TimeSeries
|
|
11
10
|
from peewee import JOIN
|
|
12
11
|
|
|
12
|
+
from macrotrace._time import ensure_timezone
|
|
13
13
|
from macrotrace.models.db import (
|
|
14
14
|
Dataset,
|
|
15
15
|
DatasetDimension,
|
|
@@ -141,7 +141,6 @@ class VintageMatch:
|
|
|
141
141
|
|
|
142
142
|
|
|
143
143
|
class MTTimeSeries:
|
|
144
|
-
|
|
145
144
|
def __init__(
|
|
146
145
|
self,
|
|
147
146
|
dataset_id: str,
|
|
@@ -150,12 +149,12 @@ class MTTimeSeries:
|
|
|
150
149
|
# vintage_start_date and vintage_end_date define the vintage window returned
|
|
151
150
|
# by this MTTimeSeries instance. Update managers may still backfill outside
|
|
152
151
|
# the requested window so future loads can move backward without data loss.
|
|
153
|
-
vintage_start_date: Optional[str | datetime] = None,
|
|
154
|
-
vintage_end_date: Optional[str | datetime] = None,
|
|
152
|
+
vintage_start_date: Optional[str | datetime | date] = None,
|
|
153
|
+
vintage_end_date: Optional[str | datetime | date] = None,
|
|
155
154
|
# Recall we want to only filter the observations returned, not the data fetched.
|
|
156
155
|
# Filtering data before writing to the db may cause incomplete vintage chains.
|
|
157
|
-
data_start_date: Optional[str | datetime] = None,
|
|
158
|
-
data_end_date: Optional[str | datetime] = None,
|
|
156
|
+
data_start_date: Optional[str | datetime | date] = None,
|
|
157
|
+
data_end_date: Optional[str | datetime | date] = None,
|
|
159
158
|
update_prior_to_load: bool = True,
|
|
160
159
|
db_path: Optional[str] = None,
|
|
161
160
|
cache_path: Optional[str] = None,
|
|
@@ -166,10 +165,19 @@ class MTTimeSeries:
|
|
|
166
165
|
dataset_id: Dataset identifier (e.g., "GDP", "UNRATE")
|
|
167
166
|
source: Data source ("FRED", "ONS", etc.)
|
|
168
167
|
series_key: Dictionary of dimension filters for multi-dimensional datasets
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
168
|
+
|
|
169
|
+
All four date windows are inclusive and accept a ``YYYY-MM-DD``
|
|
170
|
+
string, a ``datetime.date``, or a datetime. Naive input is read on
|
|
171
|
+
the source's own clock. A date becomes the source's midnight on that day,
|
|
172
|
+
matching how sources stamp their releases and observations — so e.g.
|
|
173
|
+
``vintage_end_date="2018-03-16"`` includes FRED's 2018-03-16
|
|
174
|
+
release even though it is stored at midnight US Central.
|
|
175
|
+
Pass an aware datetime to bound by an exact instant instead.
|
|
176
|
+
|
|
177
|
+
vintage_start_date: Only load vintages released on or after this date
|
|
178
|
+
vintage_end_date: Only load vintages released on or before this date
|
|
179
|
+
data_start_date: Only keep observations stamped on or after this date
|
|
180
|
+
data_end_date: Only keep observations stamped on or before this date
|
|
173
181
|
update_prior_to_load: Whether to fetch new data from API before loading
|
|
174
182
|
db_path: Path to the SQLite database. Resolution: this argument,
|
|
175
183
|
then the ``MACROTRACE_DB`` env var, then ``MacroTrace.db`` in
|
|
@@ -414,36 +422,39 @@ class MTTimeSeries:
|
|
|
414
422
|
self._analysis = MTTimeSeriesAnalysis(self)
|
|
415
423
|
return self._analysis
|
|
416
424
|
|
|
417
|
-
def as_of(self, target_date: datetime | str) -> Optional["MTTimeSeries"]:
|
|
425
|
+
def as_of(self, target_date: datetime | str | date) -> Optional["MTTimeSeries"]:
|
|
418
426
|
"""
|
|
419
427
|
Returns the most recent vintage as of a specific date.
|
|
420
428
|
|
|
429
|
+
A date string (``"YYYY-MM-DD"``), a ``datetime.date``, or a naive datetime
|
|
430
|
+
is read on the source's own clock, so a calendar date lands at the source's midnight
|
|
431
|
+
and matches how the source stamps its releases. A timezone-aware datetime is compared as the exact instant it denotes.
|
|
432
|
+
|
|
421
433
|
Raises:
|
|
422
|
-
ValueError: If
|
|
434
|
+
ValueError: If the target is a string not in ``YYYY-MM-DD`` form,
|
|
435
|
+
lies on a future calendar day in the source's timezone, or no
|
|
436
|
+
vintage exists on or before it.
|
|
423
437
|
|
|
424
438
|
Args:
|
|
425
|
-
target_date (datetime | str): The target date
|
|
426
|
-
|
|
427
|
-
|
|
439
|
+
target_date (datetime | str | date): The target date. Pass a
|
|
440
|
+
``YYYY-MM-DD`` string or a date for a calendar day, or a
|
|
441
|
+
datetime for a specific moment.
|
|
428
442
|
|
|
429
443
|
Returns:
|
|
430
|
-
MTTimeSeries: The latest available vintage
|
|
444
|
+
MTTimeSeries: The latest available vintage as of the target_date.
|
|
431
445
|
"""
|
|
432
|
-
if
|
|
433
|
-
target_date = self._parse_string_date(target_date)
|
|
434
|
-
elif isinstance(target_date, datetime):
|
|
435
|
-
if target_date.tzinfo is None:
|
|
436
|
-
logger.warning(
|
|
437
|
-
"Datetime object provided without timezone info. Assuming UTC."
|
|
438
|
-
)
|
|
439
|
-
target_date = target_date.replace(tzinfo=timezone.utc)
|
|
440
|
-
elif not isinstance(target_date, datetime):
|
|
446
|
+
if not isinstance(target_date, (str, date)):
|
|
441
447
|
raise ValueError(
|
|
442
|
-
f"Invalid target date type: {type(target_date)}. Must be a string or a datetime."
|
|
448
|
+
f"Invalid target date type: {type(target_date)}. Must be a string, a date, or a datetime."
|
|
443
449
|
)
|
|
450
|
+
target_date = self._clean_date(target_date)
|
|
444
451
|
|
|
445
|
-
#
|
|
446
|
-
|
|
452
|
+
# Guard against targets on a future calendar day. Comparing dates on
|
|
453
|
+
# the source's clock (not instants) keeps "as of today" valid even
|
|
454
|
+
# while the source's calendar day still lags UTC's.
|
|
455
|
+
native_tz = self._native_observation_timezone()
|
|
456
|
+
now_local = datetime.now(timezone.utc).astimezone(native_tz)
|
|
457
|
+
if target_date.astimezone(native_tz).date() > now_local.date():
|
|
447
458
|
raise ValueError("The target date cannot be in the future.")
|
|
448
459
|
|
|
449
460
|
eligible_vintages = self._find_eligible_vintages(target_date)
|
|
@@ -1255,28 +1266,30 @@ class MTTimeSeries:
|
|
|
1255
1266
|
|
|
1256
1267
|
def _parse_string_date(self, dt: str) -> datetime:
|
|
1257
1268
|
"""
|
|
1258
|
-
Parses a string
|
|
1269
|
+
Parses a ``YYYY-MM-DD`` string to the source's midnight on that day.
|
|
1270
|
+
|
|
1271
|
+
Date strings denote calendar days, never instants, so only the
|
|
1272
|
+
unambiguous ISO 8601 calendar form is accepted. Please pass a datetime
|
|
1273
|
+
object when a specific time matters. The day is anchored at midnight
|
|
1274
|
+
in the source's native timezone, which is how every source stamps its
|
|
1275
|
+
release dates and observation timestamps.
|
|
1259
1276
|
|
|
1260
1277
|
Args:
|
|
1261
|
-
dt (str): The
|
|
1278
|
+
dt (str): The date string to parse, in ``YYYY-MM-DD`` form.
|
|
1262
1279
|
|
|
1263
1280
|
Returns:
|
|
1264
|
-
datetime: The
|
|
1281
|
+
datetime: The source-local midnight starting that calendar day.
|
|
1282
|
+
|
|
1283
|
+
Raises:
|
|
1284
|
+
ValueError: If the string is not in ``YYYY-MM-DD`` form.
|
|
1265
1285
|
"""
|
|
1266
1286
|
try:
|
|
1267
|
-
parsed_dt =
|
|
1268
|
-
if parsed_dt.tzinfo is None:
|
|
1269
|
-
# If no timezone is provided, assume UTC
|
|
1270
|
-
logger.warning(
|
|
1271
|
-
f"Assuming datetime string {dt} is UTC timezone. Please provide a datetime object with timezone info if this is not the case."
|
|
1272
|
-
)
|
|
1273
|
-
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
|
1274
|
-
return parsed_dt
|
|
1275
|
-
|
|
1287
|
+
parsed_dt = datetime.strptime(dt, "%Y-%m-%d")
|
|
1276
1288
|
except ValueError:
|
|
1277
1289
|
raise ValueError(
|
|
1278
|
-
f"Invalid date string format {dt}.
|
|
1290
|
+
f"Invalid date string format {dt}. Date strings must be 'YYYY-MM-DD'; pass a datetime object to target a specific time."
|
|
1279
1291
|
)
|
|
1292
|
+
return ensure_timezone(parsed_dt, self._native_observation_timezone())
|
|
1280
1293
|
|
|
1281
1294
|
def _set_source(self, source: str):
|
|
1282
1295
|
"""Validate and set the data source."""
|
|
@@ -1287,23 +1300,29 @@ class MTTimeSeries:
|
|
|
1287
1300
|
)
|
|
1288
1301
|
self.source = source_upper
|
|
1289
1302
|
|
|
1290
|
-
def _clean_date(self, dt: str | datetime) -> datetime:
|
|
1291
|
-
"""
|
|
1303
|
+
def _clean_date(self, dt: str | datetime | date) -> Optional[datetime]:
|
|
1304
|
+
"""
|
|
1305
|
+
Normalize a date input to an aware datetime on the source's clock.
|
|
1306
|
+
"""
|
|
1292
1307
|
if dt is None:
|
|
1293
1308
|
return None
|
|
1294
1309
|
if isinstance(dt, str):
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1310
|
+
return self._parse_string_date(dt)
|
|
1311
|
+
if isinstance(dt, datetime):
|
|
1312
|
+
if dt.tzinfo is None:
|
|
1313
|
+
logger.warning(
|
|
1314
|
+
"Datetime object provided without timezone info. "
|
|
1315
|
+
"Interpreting it in the source's native timezone (%s).",
|
|
1316
|
+
self._native_observation_timezone(),
|
|
1317
|
+
)
|
|
1318
|
+
dt = ensure_timezone(dt, self._native_observation_timezone())
|
|
1319
|
+
return dt
|
|
1320
|
+
if isinstance(dt, date):
|
|
1321
|
+
return ensure_timezone(
|
|
1322
|
+
datetime(dt.year, dt.month, dt.day),
|
|
1323
|
+
self._native_observation_timezone(),
|
|
1304
1324
|
)
|
|
1305
|
-
|
|
1306
|
-
return dt
|
|
1325
|
+
raise TypeError(f"Invalid date format: {dt}") # Not a string or datetime
|
|
1307
1326
|
|
|
1308
1327
|
def _get_series_dimension_from_key(self, state) -> List[DatasetDimension]:
|
|
1309
1328
|
"""
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from typing import Any, List, Optional, Dict
|
|
4
|
-
from dateutil import parser
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
6
|
import pytz
|
|
8
7
|
|
|
9
8
|
from tqdm import tqdm
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
from macrotrace._time import ensure_timezone
|
|
12
11
|
from macrotrace.sources.base import (
|
|
13
12
|
APIClient,
|
|
14
13
|
UpdateManager,
|
|
@@ -115,36 +114,15 @@ class FredDatasetManager(DatasetManager):
|
|
|
115
114
|
|
|
116
115
|
def _parse_date(self, date_str: str) -> Optional[datetime]:
|
|
117
116
|
"""
|
|
118
|
-
Parse
|
|
119
|
-
FRED uses "9999-12-31" to indicate an ongoing dimension
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
date_str (str): The date string from FRED API.
|
|
123
|
-
Returns:
|
|
124
|
-
Optional[datetime]: A datetime object representing valid_to, or None if ongoing.
|
|
117
|
+
Parse a FRED API date (always YYYY-MM-DD) to midnight US Central.
|
|
118
|
+
FRED uses "9999-12-31" to indicate an ongoing dimension -> None.
|
|
125
119
|
"""
|
|
126
|
-
|
|
127
|
-
date
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# ~9 minutes and causes downstream day-rollback bugs after
|
|
133
|
-
# tz_convert + normalize. See tests/sources/fred/test_fred_tz_handling.py.
|
|
134
|
-
date = US_CENTRAL.localize(date)
|
|
135
|
-
if date.year == 9999 and date.month == 12 and date.day == 31:
|
|
136
|
-
logger.debug("Parsed ongoing dimension date (9999-12-31) as None")
|
|
137
|
-
return None
|
|
138
|
-
return date
|
|
139
|
-
except (ValueError, OverflowError) as e:
|
|
140
|
-
# FRED uses 9999-12-31 to indicate "ongoing" check for parsing errors "date value out of range"
|
|
141
|
-
if "out of range" in str(e):
|
|
142
|
-
logger.debug(
|
|
143
|
-
f"Date {date_str} out of range, treating as ongoing (None)"
|
|
144
|
-
)
|
|
145
|
-
return None
|
|
146
|
-
else:
|
|
147
|
-
raise e
|
|
120
|
+
if date_str == "9999-12-31":
|
|
121
|
+
logger.debug("Parsed ongoing dimension date (9999-12-31) as None")
|
|
122
|
+
return None
|
|
123
|
+
return ensure_timezone(
|
|
124
|
+
datetime.strptime(date_str, FRED_DATE_FORMAT), US_CENTRAL
|
|
125
|
+
)
|
|
148
126
|
|
|
149
127
|
def _convert_frequency(self, fred_frequency: str) -> Optional[str]:
|
|
150
128
|
"""
|
|
@@ -287,24 +265,6 @@ class FredReleaseManager(ReleaseManager):
|
|
|
287
265
|
def __init__(self, api_client: FredAPIClient):
|
|
288
266
|
super().__init__(api_client)
|
|
289
267
|
|
|
290
|
-
def _ensure_us_central(self, dt: datetime) -> datetime:
|
|
291
|
-
"""
|
|
292
|
-
Ensure the given datetime is in US Central timezone.
|
|
293
|
-
|
|
294
|
-
Args:
|
|
295
|
-
dt (datetime): The datetime to check.
|
|
296
|
-
Returns:
|
|
297
|
-
datetime: The datetime in US Central timezone.
|
|
298
|
-
"""
|
|
299
|
-
if dt is None:
|
|
300
|
-
return None
|
|
301
|
-
if dt.tzinfo is None:
|
|
302
|
-
# Assume US Central if no timezone info. Use pytz.localize() so the
|
|
303
|
-
# CST/CDT offset is selected per-date instead of LMT (-05:50:36).
|
|
304
|
-
return US_CENTRAL.localize(dt)
|
|
305
|
-
# Otherwise, convert to US Central
|
|
306
|
-
return dt.astimezone(US_CENTRAL)
|
|
307
|
-
|
|
308
268
|
def fetch_new_releases(
|
|
309
269
|
self,
|
|
310
270
|
state: UpdateState,
|
|
@@ -322,8 +282,8 @@ class FredReleaseManager(ReleaseManager):
|
|
|
322
282
|
List[Release]: A list of new Release objects to be created.
|
|
323
283
|
"""
|
|
324
284
|
# Convert release_start_date and release_end_date to US Central timezone if they are not None and have no timezone info
|
|
325
|
-
state.release_start_date =
|
|
326
|
-
state.release_end_date =
|
|
285
|
+
state.release_start_date = ensure_timezone(state.release_start_date, US_CENTRAL)
|
|
286
|
+
state.release_end_date = ensure_timezone(state.release_end_date, US_CENTRAL)
|
|
327
287
|
|
|
328
288
|
# Get the appropriate API start date (handles backfilling)
|
|
329
289
|
api_start_date = self._get_api_start_date(
|
|
@@ -9,6 +9,7 @@ from tenacity import (
|
|
|
9
9
|
retry_if_exception,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
+
from macrotrace._time import ensure_timezone
|
|
12
13
|
from macrotrace.models.db import (
|
|
13
14
|
Dataset,
|
|
14
15
|
DatasetDimension,
|
|
@@ -750,8 +751,10 @@ class ONSReleaseManager(ReleaseManager):
|
|
|
750
751
|
skipped_count += 1
|
|
751
752
|
continue
|
|
752
753
|
else:
|
|
753
|
-
|
|
754
|
-
|
|
754
|
+
# ensure_timezone converts (rather than overwrites) the offset
|
|
755
|
+
# should the API ever return a non-UTC time.
|
|
756
|
+
release_date = ensure_timezone(
|
|
757
|
+
datetime.fromisoformat(release_date_str), UTC
|
|
755
758
|
)
|
|
756
759
|
|
|
757
760
|
if self._skip_release(release_date, state, current_release_dates):
|
|
@@ -53,6 +53,7 @@ from tenacity import (
|
|
|
53
53
|
)
|
|
54
54
|
from tqdm import tqdm
|
|
55
55
|
|
|
56
|
+
from macrotrace._time import ensure_timezone
|
|
56
57
|
from macrotrace.models.db import (
|
|
57
58
|
DatasetDimension,
|
|
58
59
|
Observation,
|
|
@@ -611,20 +612,6 @@ def _parse_workbook(
|
|
|
611
612
|
return ParsedVintageFile(vintages=vintages, cells=cells)
|
|
612
613
|
|
|
613
614
|
|
|
614
|
-
def _ensure_utc(dt: Optional[datetime]) -> Optional[datetime]:
|
|
615
|
-
"""
|
|
616
|
-
Return the datetime as a timezone-aware UTC value, or None.
|
|
617
|
-
|
|
618
|
-
RTDSM release dates are stored in UTC, so any caller-supplied window bound
|
|
619
|
-
must also be timezone-aware to compare correctly.
|
|
620
|
-
"""
|
|
621
|
-
if dt is None:
|
|
622
|
-
return None
|
|
623
|
-
if dt.tzinfo is None:
|
|
624
|
-
return dt.replace(tzinfo=UTC)
|
|
625
|
-
return dt.astimezone(UTC)
|
|
626
|
-
|
|
627
|
-
|
|
628
615
|
class RTDSMAPIClient(APIClient):
|
|
629
616
|
"""
|
|
630
617
|
Downloads and parses a single RTDSM spreadsheet.
|
|
@@ -822,8 +809,8 @@ class RTDSMReleaseManager(ReleaseManager):
|
|
|
822
809
|
Returns:
|
|
823
810
|
List[Release]: The new releases.
|
|
824
811
|
"""
|
|
825
|
-
state.release_start_date =
|
|
826
|
-
state.release_end_date =
|
|
812
|
+
state.release_start_date = ensure_timezone(state.release_start_date, UTC)
|
|
813
|
+
state.release_end_date = ensure_timezone(state.release_end_date, UTC)
|
|
827
814
|
|
|
828
815
|
parsed = self.api_client.get_parsed_file()
|
|
829
816
|
current_release_dates = self._get_current_releases_in_db(state.dataset.id)
|
|
@@ -21,7 +21,6 @@ from datetime import datetime, timezone
|
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
from typing import Dict, List, Optional
|
|
23
23
|
from dotenv import load_dotenv
|
|
24
|
-
from dateutil import parser as date_parser
|
|
25
24
|
|
|
26
25
|
from macrotrace import MTTimeSeries
|
|
27
26
|
from macrotrace._paths import resolve_db_path
|
|
@@ -181,13 +180,6 @@ def _reset_local_db() -> None:
|
|
|
181
180
|
LOGGER.info("Reset local DB %s (dropped %d tables)", db_path, len(tables))
|
|
182
181
|
|
|
183
182
|
|
|
184
|
-
def _parse_date_to_utc(date_value: str) -> datetime:
|
|
185
|
-
dt = date_parser.isoparse(date_value)
|
|
186
|
-
if dt.tzinfo is None:
|
|
187
|
-
dt = dt.replace(tzinfo=timezone.utc)
|
|
188
|
-
return dt.astimezone(timezone.utc)
|
|
189
|
-
|
|
190
|
-
|
|
191
183
|
def _is_placeholder_value(value: object) -> bool:
|
|
192
184
|
if isinstance(value, str):
|
|
193
185
|
return value.startswith("__REPLACE_")
|
|
@@ -204,7 +196,7 @@ def _ingest_one(
|
|
|
204
196
|
source: str,
|
|
205
197
|
source_name: str,
|
|
206
198
|
dataset_id: str,
|
|
207
|
-
vintage_start_date:
|
|
199
|
+
vintage_start_date: str,
|
|
208
200
|
series_key: Optional[Dict[str, str]] = None,
|
|
209
201
|
) -> IngestResult:
|
|
210
202
|
start = time.perf_counter()
|
|
@@ -256,7 +248,7 @@ def _build_cli() -> argparse.ArgumentParser:
|
|
|
256
248
|
parser.add_argument(
|
|
257
249
|
"--vintage-start-date",
|
|
258
250
|
default="2019-01-01",
|
|
259
|
-
help="
|
|
251
|
+
help="Release start date (YYYY-MM-DD) to limit ingest volume (default: 2019-01-01).",
|
|
260
252
|
)
|
|
261
253
|
parser.add_argument(
|
|
262
254
|
"--max-failures",
|
|
@@ -293,7 +285,9 @@ def main() -> int:
|
|
|
293
285
|
LOGGER.error("FRED_API_KEY is not set; FRED ingestion cannot run.")
|
|
294
286
|
return 2
|
|
295
287
|
|
|
296
|
-
|
|
288
|
+
# Passed through to MTTimeSeries, which reads the calendar date on each
|
|
289
|
+
# source's own clock.
|
|
290
|
+
vintage_start_date = args.vintage_start_date
|
|
297
291
|
results: List[IngestResult] = []
|
|
298
292
|
|
|
299
293
|
LOGGER.info("Starting FRED ingest (%d sources)", len(FRED_SOURCES))
|