macrotrace 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {macrotrace-0.2.0 → macrotrace-0.2.2}/.github/workflows/docs.yml +1 -1
  2. {macrotrace-0.2.0 → macrotrace-0.2.2}/CHANGELOG.md +24 -0
  3. {macrotrace-0.2.0 → macrotrace-0.2.2}/PKG-INFO +2 -6
  4. {macrotrace-0.2.0 → macrotrace-0.2.2}/README.md +1 -5
  5. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/time_series.py +392 -28
  6. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/base.py +7 -1
  7. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/fred.py +2 -0
  8. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/ons.py +2 -0
  9. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/rtdsm.py +2 -0
  10. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/series/test_series.py +223 -3
  11. {macrotrace-0.2.0 → macrotrace-0.2.2}/.github/workflows/ci.yml +0 -0
  12. {macrotrace-0.2.0 → macrotrace-0.2.2}/.github/workflows/release.yml +0 -0
  13. {macrotrace-0.2.0 → macrotrace-0.2.2}/.gitignore +0 -0
  14. {macrotrace-0.2.0 → macrotrace-0.2.2}/.pre-commit-config.yaml +0 -0
  15. {macrotrace-0.2.0 → macrotrace-0.2.2}/.python-version +0 -0
  16. {macrotrace-0.2.0 → macrotrace-0.2.2}/LICENSE +0 -0
  17. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/__init__.py +0 -0
  18. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/_paths.py +0 -0
  19. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/cli.py +0 -0
  20. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/graphing.py +0 -0
  21. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/__init__.py +0 -0
  22. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/db.py +0 -0
  23. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/__init__.py +0 -0
  24. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/analysis.py +0 -0
  25. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/observation.py +0 -0
  26. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/plotter.py +0 -0
  27. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/models/mt/series_metadata.py +0 -0
  28. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/__init__.py +0 -0
  29. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/cli.py +0 -0
  30. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/common.py +0 -0
  31. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/ons_cli/tui.py +0 -0
  32. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/py.typed +0 -0
  33. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/__init__.py +0 -0
  34. {macrotrace-0.2.0 → macrotrace-0.2.2}/macrotrace/sources/example.py +0 -0
  35. {macrotrace-0.2.0 → macrotrace-0.2.2}/pyproject.toml +0 -0
  36. {macrotrace-0.2.0 → macrotrace-0.2.2}/scripts/backstop_ingest.py +0 -0
  37. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/assets/mt/time_series/expected_vm.csv +0 -0
  38. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/assets/mt/time_series/from_dataframe.csv +0 -0
  39. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/assets/mt/time_series/from_dataframe_with_tz.csv +0 -0
  40. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/series/test_db_path_forwarding.py +0 -0
  41. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/series/test_init.py +0 -0
  42. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/test_analysis.py +0 -0
  43. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/test_metadata.py +0 -0
  44. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/test_plotter.py +0 -0
  45. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/mt/utils.py +0 -0
  46. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/models/test_db_models.py +0 -0
  47. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_cli.py +0 -0
  48. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_common.py +0 -0
  49. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_root_cli.py +0 -0
  50. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/test_tui.py +0 -0
  51. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/ons_cli/utils.py +0 -0
  52. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/fixtures.py +0 -0
  53. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_api_client.py +0 -0
  54. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_dataset_manager.py +0 -0
  55. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_observation_manager.py +0 -0
  56. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_release_manager.py +0 -0
  57. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_series_manager.py +0 -0
  58. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_update_manager.py +0 -0
  59. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_base_update_state.py +0 -0
  60. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/base/test_db_path_resolution.py +0 -0
  61. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/fixtures.py +0 -0
  62. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_api_client.py +0 -0
  63. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_dataset_manager.py +0 -0
  64. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_observation_manager.py +0 -0
  65. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_release_manager.py +0 -0
  66. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_series_manager.py +0 -0
  67. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_tz_handling.py +0 -0
  68. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/fred/test_fred_update_manager.py +0 -0
  69. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/fixtures.py +0 -0
  70. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_api_client.py +0 -0
  71. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_dataset_manager.py +0 -0
  72. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_observation_manager.py +0 -0
  73. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_release_manager.py +0 -0
  74. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_series_manager.py +0 -0
  75. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/ons/test_ons_update_manager.py +0 -0
  76. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/fixtures.py +0 -0
  77. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_api_client.py +0 -0
  78. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_dataset_manager.py +0 -0
  79. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_helpers.py +0 -0
  80. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_observation_manager.py +0 -0
  81. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_release_manager.py +0 -0
  82. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_series_manager.py +0 -0
  83. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/sources/rtdsm/test_rtdsm_update_manager.py +0 -0
  84. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/test_package_init.py +0 -0
  85. {macrotrace-0.2.0 → macrotrace-0.2.2}/tests/test_paths.py +0 -0
  86. {macrotrace-0.2.0 → macrotrace-0.2.2}/uv.lock +0 -0
@@ -37,7 +37,7 @@ jobs:
37
37
  - name: Deploy dev docs (push to main)
38
38
  if: github.ref == 'refs/heads/main'
39
39
  run: |
40
- uv run mike deploy --push --update-aliases dev
40
+ uv run mike deploy --push --update-aliases --prop-set hidden=true dev
41
41
  if ! uv run mike list 2>/dev/null | grep -qE '^[0-9]'; then
42
42
  uv run mike set-default --push dev
43
43
  fi
@@ -3,6 +3,30 @@
3
3
  Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/);
4
4
  versions follow [SemVer](https://semver.org/).
5
5
 
6
+ ## 0.2.2 — 2026-06-12
7
+
8
+ - **Vintage matching:** `identify_vintage` now interprets a tz-naive index in
9
+ the source's native timezone (e.g. midnight US Central for FRED) instead of
10
+ UTC, so plain dates match FRED vintages.
11
+ - **Vintage matching:** Added a `decimals` argument that rounds both sides
12
+ before comparison, for matching data published at a fixed precision.
13
+ - **Vintage matching:** `VintageMatch.failure_reason` now reports why nothing
14
+ matched: timestamps no vintage contains (`"coverage"`) vs value
15
+ disagreements (`"values"`).
16
+ - **Vintage matching:** Numeric/positional indexes are rejected with a clear
17
+ error, and `pd.PeriodIndex` is supported.
18
+ - **Vintage matching:** When nothing matches, `VintageMatch.alignment_hint`
19
+ flags timestamps that would match under a wrong timezone localization, a
20
+ constant time shift, or a month-end vs month-start convention.
21
+
22
+ ## 0.2.1 — 2026-06-11
23
+
24
+ - **Docs:** RTDSM is now listed as an available source on the documentation
25
+ homepage — it had been left under "Coming Soon" when 0.2.0 shipped.
26
+ - **Docs:** The version selector now shows the `latest` label next to the
27
+ release it points at, and the in-development `dev` build is hidden from
28
+ the selector (it is still reachable directly at `/dev/`).
29
+
6
30
  ## 0.2.0 — 2026-06-10
7
31
 
8
32
  - **Sources:** Added the Federal Reserve Bank of Philadelphia's Real-Time
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: macrotrace
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: A Python library for managing and analyzing macroeconomic time series data with vintage awareness.
5
5
  Project-URL: Homepage, https://github.com/john-ramsey/macrotrace
6
6
  Project-URL: Repository, https://github.com/john-ramsey/macrotrace
@@ -170,13 +170,9 @@ if match.is_ambiguous:
170
170
  elif match.matched:
171
171
  print(f"Matches the {match.release_date.date()} vintage")
172
172
  else:
173
- print("No matching vintage found")
173
+ print(f"No matching vintage found (failed on: {match.failure_reason})")
174
174
  ```
175
175
 
176
- A match is ambiguous when the data is unchanged across consecutive vintages, so
177
- the values alone cannot pin down a single release; `release_dates` lists every
178
- consistent vintage in that case.
179
-
180
176
  ## Command-Line Tools
181
177
 
182
178
  MacroTrace includes command-line tools for exploring ONS datasets:
@@ -129,13 +129,9 @@ if match.is_ambiguous:
129
129
  elif match.matched:
130
130
  print(f"Matches the {match.release_date.date()} vintage")
131
131
  else:
132
- print("No matching vintage found")
132
+ print(f"No matching vintage found (failed on: {match.failure_reason})")
133
133
  ```
134
134
 
135
- A match is ambiguous when the data is unchanged across consecutive vintages, so
136
- the values alone cannot pin down a single release; `release_dates` lists every
137
- consistent vintage in that case.
138
-
139
135
  ## Command-Line Tools
140
136
 
141
137
  MacroTrace includes command-line tools for exploring ONS datasets:
@@ -1,10 +1,11 @@
1
- from typing import TYPE_CHECKING, List, Optional, Dict, Any
1
+ from typing import TYPE_CHECKING, List, Optional, Dict, Any, Tuple
2
2
  from dataclasses import dataclass, replace
3
3
  from dateutil import parser
4
- from datetime import datetime, timezone
4
+ from datetime import datetime, timedelta, timezone, tzinfo
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ from pandas.tseries.frequencies import get_period_alias
8
9
  from tabulate import tabulate
9
10
  from darts import TimeSeries
10
11
  from peewee import JOIN
@@ -32,11 +33,15 @@ logger = logging.getLogger(__name__)
32
33
  VALID_SOURCES = ["FRED", "ONS", "RTDSM", "USER"]
33
34
  # USER is for user provided data, not from an API
34
35
 
36
+ # With fewer observations than this, a constant-shift scan can match a vintage
37
+ # by coincidence, so identify_vintage only reports shift hints above it.
38
+ MIN_OBSERVATIONS_FOR_SHIFT_DETECTION = 5
39
+
35
40
 
36
41
  @dataclass
37
42
  class VintageMatch:
38
43
  """
39
- Result of matching an undated data series against the vintages of an MTTimeSeries (see ``MTTimeSeries.identify_vintage``).
44
+ Result of matching a data series with an unknown release date against the vintages of an MTTimeSeries (see ``MTTimeSeries.identify_vintage``).
40
45
 
41
46
  A match is ambiguous when the supplied data is consistent with more than one vintage.
42
47
  This is common when the data only covers observations that were never revised across a run of consecutive vintages, so the values alone cannot pin down a single release.
@@ -46,12 +51,22 @@ class VintageMatch:
46
51
  n_observations: Number of non-null observations from the supplied data that were compared against each vintage.
47
52
  rtol: Relative tolerance used for the value comparison.
48
53
  atol: Absolute tolerance used for the value comparison.
54
+ decimals: Number of decimals both sides were rounded to before comparison, or None when no rounding was applied.
55
+ n_vintages_compared: Total number of vintages the supplied data was compared against.
56
+ n_vintages_covering: Number of vintages containing every supplied timestamp. When zero, the data failed on coverage rather than on values — see ``failure_reason``.
57
+ alignment_hint: When nothing matched but a diagnostic pass found a reinterpretation of the timestamps under which the values do match (wrong timezone localization, a constant time shift, or a different day-of-period convention), a human-readable description of it. The hinted reinterpretation never counts as a match — fix the index and re-run.
58
+ time_shift: The constant shift that, added to the supplied index, makes the values match at least one vintage. Only set when the hint came from the constant-shift detector.
49
59
  """
50
60
 
51
61
  release_dates: List[datetime]
52
62
  n_observations: int
53
63
  rtol: float
54
64
  atol: float
65
+ decimals: Optional[int] = None
66
+ n_vintages_compared: int = 0
67
+ n_vintages_covering: int = 0
68
+ alignment_hint: Optional[str] = None
69
+ time_shift: Optional[timedelta] = None
55
70
 
56
71
  @property
57
72
  def matched(self) -> bool:
@@ -63,6 +78,20 @@ class VintageMatch:
63
78
  """True if the supplied data matched more than one vintage."""
64
79
  return len(self.release_dates) > 1
65
80
 
81
+ @property
82
+ def failure_reason(self) -> Optional[str]:
83
+ """
84
+ Why the supplied data matched no vintage, or None when it matched.
85
+
86
+ Returns "coverage" when no vintage contains the supplied timestamps — usually a sign the index dates or timezone are wrong rather than the values — and "values" when at least one vintage contains the timestamps but none matched (the values disagreed, or ``require_exact_coverage`` excluded vintages carrying extra observations).
87
+
88
+ Returns:
89
+ Optional[str]: "coverage", "values", or None when the data matched.
90
+ """
91
+ if self.matched:
92
+ return None
93
+ return "coverage" if self.n_vintages_covering == 0 else "values"
94
+
66
95
  @property
67
96
  def release_date(self) -> Optional[datetime]:
68
97
  """
@@ -85,7 +114,20 @@ class VintageMatch:
85
114
  """
86
115
  compared = f"compared {self.n_observations} observation(s)"
87
116
  if not self.matched:
88
- return f"VintageMatch(no matching vintage found; {compared})"
117
+ if self.failure_reason == "coverage":
118
+ message = (
119
+ "VintageMatch(no matching vintage found; no vintage contains "
120
+ "the supplied timestamps - check the index dates/timezone"
121
+ )
122
+ else:
123
+ message = (
124
+ f"VintageMatch(no matching vintage found; "
125
+ f"{self.n_vintages_covering} vintage(s) contain the supplied "
126
+ f"timestamps but none matched"
127
+ )
128
+ if self.alignment_hint:
129
+ message += f"; hint: {self.alignment_hint}"
130
+ return f"{message}; {compared})"
89
131
  if self.is_ambiguous:
90
132
  dates = ", ".join(d.strftime("%Y-%m-%d") for d in self.release_dates)
91
133
  return (
@@ -420,42 +462,59 @@ class MTTimeSeries:
420
462
  rtol: float = 1e-05,
421
463
  atol: float = 1e-08,
422
464
  require_exact_coverage: bool = False,
465
+ decimals: Optional[int] = None,
423
466
  ) -> VintageMatch:
424
467
  """
425
- Identify which vintage(s) a block of undated data came from.
468
+ Identify which vintage(s) a block of data with an unknown release date came from.
426
469
 
427
470
  Replication packages frequently ship a series of observations with no release date attached, only a source.
428
471
  This compares the supplied data against every vintage in this MTTimeSeries and reports the release date(s) whose values it is consistent with, so you can recover the vintage you are actually working with.
472
+ Note that only the release date is treated as unknown: the observations themselves must be dated, with the series index supplying the observation dates.
429
473
 
430
474
  The supplied data is treated as a (possibly incomplete) window of a vintage: every timestamp in ``series`` must be present in a vintage and its values must agree (within tolerance) for that vintage to match.
431
475
  A vintage may carry extra observations the data does not include.
432
476
  When the data does not change across consecutive vintages the match is necessarily ambiguous, and all consistent release dates are returned.
433
477
 
478
+ When nothing matches, a diagnostic pass checks whether the values would match under a common timestamp misalignment — the index localized to the wrong timezone, shifted by a constant offset, or stamped with a different day-of-period convention (e.g. month-end instead of month-start) — and reports it via ``VintageMatch.alignment_hint``.
479
+ A hinted reinterpretation is never counted as a match.
480
+
434
481
  Args:
435
- series (pd.Series): The undated data to identify, indexed by date.
436
- The index becomes the observation timestamps and the values are compared against each vintage.
437
- A tz-naive index is assumed to be UTC, and null values are dropped before matching.
482
+ series (pd.Series): The data to identify, indexed by observation date.
483
+ A tz-naive index (dates, date strings, or naive timestamps) is interpreted in the source's native observation timezone e.g. midnight US Central for FRED — falling back to UTC with a warning when the source has no registered manager.
484
+ A ``pd.PeriodIndex`` is compared on each period's start timestamp.
485
+ A numeric index is rejected, because pandas would silently read it as nanosecond offsets from 1970 rather than dates.
486
+ Null values are dropped before matching.
438
487
  rtol (float): Relative tolerance for the value comparison, passed through to ``numpy.isclose``. Defaults to 1e-05.
439
488
  atol (float): Absolute tolerance for the value comparison, passed through to ``numpy.isclose``. Defaults to 1e-08.
440
489
  require_exact_coverage (bool): If True, a vintage only matches when its timestamps are exactly the timestamps in ``series``, rather than allowing the data to be a sub-window of the vintage. Defaults to False.
490
+ decimals (Optional[int]): When set, both the supplied data and each vintage's values are rounded to this many decimals before comparison.
491
+ Use this when the data was published at a fixed precision (e.g. ``decimals=1`` for a series published at one decimal place); it is more faithful than loosening ``atol``, which both accepts values that round apart and rejects values that round together. Defaults to None (no rounding).
441
492
 
442
493
  Returns:
443
- VintageMatch: The matching release date(s) and comparison details. Check ``matched`` to see whether at least one vintage matched.
494
+ VintageMatch: The matching release date(s) and comparison details.
495
+ Check ``matched`` to see whether at least one vintage matched, ``failure_reason`` to distinguish data whose timestamps no vintage contains ("coverage") from data that no vintage matched despite containing its timestamps ("values"), and ``alignment_hint`` for a detected timestamp misalignment.
444
496
 
445
497
  Raises:
446
498
  TypeError: If ``series`` is not a pandas Series.
447
- ValueError: If ``series`` is empty, has a non-date or duplicated index, or contains no non-null observations.
499
+ ValueError: If ``series`` is empty, has a numeric, non-date, or duplicated index, or contains no non-null observations.
448
500
  """
449
- candidate = self._prepare_candidate_series(series)
501
+ candidate, original_tz = self._prepare_candidate_series(series)
502
+ candidate_values = candidate.to_numpy(dtype=float)
503
+ if decimals is not None:
504
+ candidate_values = np.round(candidate_values, decimals)
450
505
 
451
506
  matches: List[datetime] = []
507
+ vintage_frames: List[Tuple[datetime, pd.Series]] = []
508
+ n_vintages_covering = 0
452
509
  for vintage in self._vintages_including_current_series:
453
510
  vintage_df = vintage.to_dataframe(mode="default", tz="utc")
454
511
  vintage_series = vintage_df.set_index("timestamp")["value"]
512
+ vintage_frames.append((vintage.release_date, vintage_series))
455
513
 
456
514
  # Every supplied timestamp must exist in the vintage, otherwise the data cannot be a window of it.
457
515
  if not candidate.index.isin(vintage_series.index).all():
458
516
  continue
517
+ n_vintages_covering += 1
459
518
 
460
519
  # With exact coverage the vintage must hold exactly the supplied timestamps and nothing more.
461
520
  if (
@@ -464,37 +523,64 @@ class MTTimeSeries:
464
523
  ):
465
524
  continue
466
525
 
467
- aligned = vintage_series.reindex(candidate.index)
526
+ aligned_values = vintage_series.reindex(candidate.index).to_numpy(
527
+ dtype=float
528
+ )
529
+ if decimals is not None:
530
+ aligned_values = np.round(aligned_values, decimals)
468
531
  if np.isclose(
469
- candidate.to_numpy(dtype=float),
470
- aligned.to_numpy(dtype=float),
532
+ candidate_values,
533
+ aligned_values,
471
534
  rtol=rtol,
472
535
  atol=atol,
473
536
  ).all():
474
537
  matches.append(vintage.release_date)
475
538
 
539
+ alignment_hint: Optional[str] = None
540
+ time_shift: Optional[timedelta] = None
541
+ if not matches:
542
+ alignment_hint, time_shift = self._diagnose_misalignment(
543
+ candidate,
544
+ candidate_values,
545
+ vintage_frames,
546
+ rtol,
547
+ atol,
548
+ decimals,
549
+ original_tz,
550
+ )
551
+ if alignment_hint is not None:
552
+ logger.warning("No vintage matched, but %s.", alignment_hint)
553
+
476
554
  return VintageMatch(
477
555
  release_dates=sorted(matches),
478
556
  n_observations=len(candidate),
479
557
  rtol=rtol,
480
558
  atol=atol,
559
+ decimals=decimals,
560
+ n_vintages_compared=len(vintage_frames),
561
+ n_vintages_covering=n_vintages_covering,
562
+ alignment_hint=alignment_hint,
563
+ time_shift=time_shift,
481
564
  )
482
565
 
483
- def _prepare_candidate_series(self, series: pd.Series) -> pd.Series:
566
+ def _prepare_candidate_series(
567
+ self, series: pd.Series
568
+ ) -> Tuple[pd.Series, Optional[tzinfo]]:
484
569
  """
485
570
  Validate and normalize a user-supplied data series for vintage matching.
486
571
 
487
572
  Coerces the values to numeric, drops nulls, and renders the index as a sorted, unique, tz-aware UTC DatetimeIndex so it lines up with the timestamps produced by ``to_dataframe(tz="utc")``.
573
+ A tz-naive index is interpreted in the source's native observation timezone (see ``_native_observation_timezone``), a PeriodIndex is taken at each period's start, and a numeric index is rejected.
488
574
 
489
575
  Args:
490
576
  series (pd.Series): The user-supplied data indexed by date.
491
577
 
492
578
  Returns:
493
- pd.Series: The cleaned candidate series indexed by UTC timestamps.
579
+ Tuple[pd.Series, Optional[tzinfo]]: The cleaned candidate series indexed by UTC timestamps, and the timezone the supplied index carried (None when it was tz-naive) so misalignment diagnostics can recover the original wall-clock times.
494
580
 
495
581
  Raises:
496
582
  TypeError: If ``series`` is not a pandas Series.
497
- ValueError: If ``series`` is empty, has a non-date or duplicated index, or contains no non-null observations.
583
+ ValueError: If ``series`` is empty, has a numeric, non-date, or duplicated index, or contains no non-null observations.
498
584
  """
499
585
  if not isinstance(series, pd.Series):
500
586
  raise TypeError(
@@ -507,8 +593,20 @@ class MTTimeSeries:
507
593
  if candidate.empty:
508
594
  raise ValueError("The series contains no non-null observations to match.")
509
595
 
596
+ index_data = candidate.index
597
+ # Periods carry real dates; compare on each period's start timestamp.
598
+ if isinstance(index_data, pd.PeriodIndex):
599
+ index_data = index_data.to_timestamp()
600
+
601
+ # Reject positional/numeric indexes before pd.to_datetime, which would
602
+ # silently read them as nanosecond offsets from 1970-01-01.
603
+ if pd.api.types.is_numeric_dtype(index_data):
604
+ raise ValueError(
605
+ "The series has a numeric index, not dates. Set the observation dates on the index before matching."
606
+ )
607
+
510
608
  try:
511
- index = pd.to_datetime(candidate.index)
609
+ index = pd.to_datetime(index_data)
512
610
  except (ValueError, TypeError) as exc:
513
611
  raise ValueError(
514
612
  "The series must be indexed by dates that pandas can parse."
@@ -517,11 +615,15 @@ class MTTimeSeries:
517
615
  if not isinstance(index, pd.DatetimeIndex):
518
616
  raise ValueError("The series must be indexed by dates, not scalar values.")
519
617
 
618
+ original_tz = index.tz
520
619
  if index.tz is None:
620
+ native_tz = self._native_observation_timezone()
521
621
  logger.warning(
522
- "The series index has no timezone information. Assuming UTC."
622
+ "The series index has no timezone information. Interpreting it in "
623
+ "the source's native observation timezone (%s).",
624
+ native_tz,
523
625
  )
524
- index = index.tz_localize("UTC")
626
+ index = index.tz_localize(native_tz).tz_convert("UTC")
525
627
  else:
526
628
  index = index.tz_convert("UTC")
527
629
 
@@ -529,7 +631,249 @@ class MTTimeSeries:
529
631
  raise ValueError("The series index contains duplicate timestamps.")
530
632
 
531
633
  candidate.index = index
532
- return candidate.sort_index()
634
+ return candidate.sort_index(), original_tz
635
+
636
+ def _diagnose_misalignment(
637
+ self,
638
+ candidate: pd.Series,
639
+ candidate_values: np.ndarray,
640
+ vintage_frames: List[Tuple[datetime, pd.Series]],
641
+ rtol: float,
642
+ atol: float,
643
+ decimals: Optional[int],
644
+ original_tz: Optional[tzinfo],
645
+ ) -> Tuple[Optional[str], Optional[timedelta]]:
646
+ """
647
+ Look for a timestamp reinterpretation under which the unmatched data would match.
648
+
649
+ Runs the detectors from most to least specific — wrong timezone localization, a constant time shift, then a day-of-period convention mismatch — and stops at the first that fires.
650
+
651
+ Args:
652
+ candidate (pd.Series): The prepared candidate series (UTC index).
653
+ candidate_values (np.ndarray): The candidate values, already rounded when ``decimals`` is set.
654
+ vintage_frames (List[Tuple[datetime, pd.Series]]): Each vintage's release date and UTC-indexed values.
655
+ rtol (float): Relative tolerance for the value comparison.
656
+ atol (float): Absolute tolerance for the value comparison.
657
+ decimals (Optional[int]): Decimals both sides are rounded to, or None.
658
+ original_tz (Optional[tzinfo]): The timezone the supplied index carried, None when it was tz-naive.
659
+
660
+ Returns:
661
+ Tuple[Optional[str], Optional[timedelta]]: A human-readable hint and, for the constant-shift detector only, the shift that aligns the index. Both None when no detector fired.
662
+ """
663
+ hint = self._diagnose_wrong_timezone(
664
+ candidate,
665
+ candidate_values,
666
+ vintage_frames,
667
+ rtol,
668
+ atol,
669
+ decimals,
670
+ original_tz,
671
+ )
672
+ if hint is not None:
673
+ return hint, None
674
+
675
+ hint, shift = self._diagnose_constant_shift(
676
+ candidate, candidate_values, vintage_frames, rtol, atol, decimals
677
+ )
678
+ if hint is not None:
679
+ return hint, shift
680
+
681
+ hint = self._diagnose_period_alignment(
682
+ candidate, candidate_values, vintage_frames, rtol, atol, decimals
683
+ )
684
+ return hint, None
685
+
686
+ def _diagnose_wrong_timezone(
687
+ self,
688
+ candidate: pd.Series,
689
+ candidate_values: np.ndarray,
690
+ vintage_frames: List[Tuple[datetime, pd.Series]],
691
+ rtol: float,
692
+ atol: float,
693
+ decimals: Optional[int],
694
+ original_tz: Optional[tzinfo],
695
+ ) -> Optional[str]:
696
+ """
697
+ Check whether the data matches when its wall-clock times are read in the source's native timezone.
698
+
699
+ Only applies to a tz-aware index (a naive one already went through the native timezone), and catches indexes localized to the wrong timezone — including across DST changes, where the error is not a constant offset.
700
+
701
+ Returns:
702
+ Optional[str]: The hint, or None when the detector did not fire.
703
+ """
704
+ if original_tz is None:
705
+ return None
706
+ native_tz = self._native_observation_timezone()
707
+ wall_clock = candidate.index.tz_convert(original_tz).tz_localize(None)
708
+ try:
709
+ reinterpreted = wall_clock.tz_localize(native_tz).tz_convert("UTC")
710
+ except Exception:
711
+ # Wall-clock times that do not exist (or are ambiguous) in the
712
+ # native timezone around a DST change cannot be reinterpreted.
713
+ return None
714
+ if reinterpreted.has_duplicates or reinterpreted.equals(candidate.index):
715
+ return None
716
+
717
+ n_matching = sum(
718
+ self._candidate_matches_vintage(
719
+ reinterpreted, vintage_series, candidate_values, rtol, atol, decimals
720
+ )
721
+ for _, vintage_series in vintage_frames
722
+ )
723
+ if n_matching == 0:
724
+ return None
725
+ return (
726
+ f"the values match {n_matching} vintage(s) when the wall-clock times "
727
+ f"are reinterpreted in the source's native observation timezone "
728
+ f"({native_tz}) — the index appears to be localized to the wrong "
729
+ f"timezone; pass a tz-naive index or localize it to {native_tz}"
730
+ )
731
+
732
+ def _diagnose_constant_shift(
733
+ self,
734
+ candidate: pd.Series,
735
+ candidate_values: np.ndarray,
736
+ vintage_frames: List[Tuple[datetime, pd.Series]],
737
+ rtol: float,
738
+ atol: float,
739
+ decimals: Optional[int],
740
+ ) -> Tuple[Optional[str], Optional[timedelta]]:
741
+ """
742
+ Check whether the data matches a vintage when its index is shifted by a constant offset.
743
+
744
+ Offsets are anchored on aligning the first candidate timestamp to each vintage timestamp and pruned by requiring the middle and last timestamps to land in the vintage too, so only structurally possible shifts are value-checked.
745
+ Skipped for short candidates, where some shift could match by coincidence (see ``MIN_OBSERVATIONS_FOR_SHIFT_DETECTION``).
746
+
747
+ Returns:
748
+ Tuple[Optional[str], Optional[timedelta]]: The hint and the shift to add to the index, or (None, None) when the detector did not fire.
749
+ """
750
+ if len(candidate) < MIN_OBSERVATIONS_FOR_SHIFT_DETECTION:
751
+ return None, None
752
+
753
+ first = candidate.index[0]
754
+ middle = candidate.index[len(candidate) // 2]
755
+ last = candidate.index[-1]
756
+ shifts: Dict[timedelta, int] = {}
757
+ for _, vintage_series in vintage_frames:
758
+ offsets = vintage_series.index - first
759
+ offsets = offsets[(middle + offsets).isin(vintage_series.index)]
760
+ offsets = offsets[(last + offsets).isin(vintage_series.index)]
761
+ for offset in offsets:
762
+ if offset == pd.Timedelta(0):
763
+ # A zero shift is the comparison that already failed.
764
+ continue
765
+ if self._candidate_matches_vintage(
766
+ candidate.index + offset,
767
+ vintage_series,
768
+ candidate_values,
769
+ rtol,
770
+ atol,
771
+ decimals,
772
+ ):
773
+ shifts[offset] = shifts.get(offset, 0) + 1
774
+
775
+ if not shifts:
776
+ return None, None
777
+ best = min(shifts, key=abs)
778
+ direction = "forward" if best > pd.Timedelta(0) else "back"
779
+ hint = (
780
+ f"the values match {shifts[best]} vintage(s) when the index is "
781
+ f"shifted {direction} by {abs(best)} — the timestamps appear to "
782
+ f"follow a different convention than the stored observations"
783
+ )
784
+ return hint, best
785
+
786
+ def _diagnose_period_alignment(
787
+ self,
788
+ candidate: pd.Series,
789
+ candidate_values: np.ndarray,
790
+ vintage_frames: List[Tuple[datetime, pd.Series]],
791
+ rtol: float,
792
+ atol: float,
793
+ decimals: Optional[int],
794
+ ) -> Optional[str]:
795
+ """
796
+ Check whether the data matches a vintage when both are compared by calendar period.
797
+
798
+ Reduces both indexes to periods at the series frequency (daily or coarser), which washes out time-of-day and day-of-period conventions — catching e.g. month-end dates against month-start storage, a mismatch that is not a constant offset.
799
+
800
+ Returns:
801
+ Optional[str]: The hint, or None when the detector did not fire.
802
+ """
803
+ try:
804
+ freq = self._infer_pandas_freq()
805
+ except (ValueError, TypeError):
806
+ # Too few observations, or per-row DST offsets that pandas cannot
807
+ # combine into a single tz-aware index.
808
+ return None
809
+ if freq is None:
810
+ return None
811
+ period_freq = get_period_alias(freq)
812
+ if period_freq is None or period_freq[:1].upper() not in {
813
+ "D",
814
+ "W",
815
+ "M",
816
+ "Q",
817
+ "A",
818
+ "Y",
819
+ }:
820
+ return None
821
+
822
+ candidate_periods = candidate.index.tz_localize(None).to_period(period_freq)
823
+ if candidate_periods.has_duplicates:
824
+ return None
825
+
826
+ n_matching = 0
827
+ for _, vintage_series in vintage_frames:
828
+ vintage_periods = vintage_series.index.tz_localize(None).to_period(
829
+ period_freq
830
+ )
831
+ if vintage_periods.has_duplicates:
832
+ continue
833
+ period_series = pd.Series(vintage_series.to_numpy(), index=vintage_periods)
834
+ if self._candidate_matches_vintage(
835
+ candidate_periods, period_series, candidate_values, rtol, atol, decimals
836
+ ):
837
+ n_matching += 1
838
+
839
+ if n_matching == 0:
840
+ return None
841
+ return (
842
+ f"the values match {n_matching} vintage(s) when compared by calendar "
843
+ f"period ({period_freq}) — the index appears to use a different "
844
+ f"day-of-period or time convention than the stored observations "
845
+ f"(e.g. month-end instead of month-start dates)"
846
+ )
847
+
848
+ @staticmethod
849
+ def _candidate_matches_vintage(
850
+ index: pd.Index,
851
+ vintage_series: pd.Series,
852
+ candidate_values: np.ndarray,
853
+ rtol: float,
854
+ atol: float,
855
+ decimals: Optional[int],
856
+ ) -> bool:
857
+ """
858
+ Whether every index entry exists in the vintage with values agreeing within tolerance.
859
+
860
+ Args:
861
+ index (pd.Index): The (possibly reinterpreted) candidate index.
862
+ vintage_series (pd.Series): The vintage values, indexed compatibly with ``index``.
863
+ candidate_values (np.ndarray): The candidate values, already rounded when ``decimals`` is set.
864
+ rtol (float): Relative tolerance for the value comparison.
865
+ atol (float): Absolute tolerance for the value comparison.
866
+ decimals (Optional[int]): Decimals to round the vintage values to, or None.
867
+
868
+ Returns:
869
+ bool: True when the index is fully covered and all values agree.
870
+ """
871
+ if not index.isin(vintage_series.index).all():
872
+ return False
873
+ aligned = vintage_series.reindex(index).to_numpy(dtype=float)
874
+ if decimals is not None:
875
+ aligned = np.round(aligned, decimals)
876
+ return bool(np.isclose(candidate_values, aligned, rtol=rtol, atol=atol).all())
533
877
 
534
878
  ### Theoretically if the units change, we should not be able to compare them
535
879
  def generate_vintage_matrix(self) -> pd.DataFrame:
@@ -1081,22 +1425,42 @@ class MTTimeSeries:
1081
1425
  """
1082
1426
  return self.vintages + [self]
1083
1427
 
1084
- def _get_update_manager(self):
1085
- """Get the appropriate update manager for the data source.
1086
-
1087
- Returns:
1088
- UpdateManager: An instance of the appropriate update manager class.
1089
- """
1428
+ @staticmethod
1429
+ def _source_manager_classes() -> Dict[str, type]:
1430
+ """Map source names to their UpdateManager classes, imported lazily to avoid circular imports."""
1090
1431
  from macrotrace.sources.fred import FredUpdateManager
1091
1432
  from macrotrace.sources.ons import ONSUpdateManager
1092
1433
  from macrotrace.sources.rtdsm import RTDSMUpdateManager
1093
1434
 
1094
- source_managers = {
1435
+ return {
1095
1436
  "FRED": FredUpdateManager,
1096
1437
  "ONS": ONSUpdateManager,
1097
1438
  "RTDSM": RTDSMUpdateManager,
1098
1439
  }
1099
1440
 
1441
+ def _native_observation_timezone(self) -> tzinfo:
1442
+ """
1443
+ The timezone this series' source stamps observation timestamps with.
1444
+
1445
+ Looked up from the source's update manager class (``NATIVE_OBSERVATION_TZ``).
1446
+ Sources without a registered manager (e.g. user-provided data) fall back to UTC.
1447
+
1448
+ Returns:
1449
+ tzinfo: The source's declared observation timezone, or UTC.
1450
+ """
1451
+ manager_class = self._source_manager_classes().get(self.source)
1452
+ if manager_class is None:
1453
+ return timezone.utc
1454
+ return manager_class.NATIVE_OBSERVATION_TZ
1455
+
1456
+ def _get_update_manager(self):
1457
+ """Get the appropriate update manager for the data source.
1458
+
1459
+ Returns:
1460
+ UpdateManager: An instance of the appropriate update manager class.
1461
+ """
1462
+ source_managers = self._source_manager_classes()
1463
+
1100
1464
  assert (
1101
1465
  self.source in source_managers.keys()
1102
1466
  ), f"Unsupported source: {self.source}. No update manager available."
@@ -1,5 +1,5 @@
1
1
  from typing import Any, List, Dict, Optional, Tuple
2
- from datetime import datetime, timezone
2
+ from datetime import datetime, timezone, tzinfo
3
3
  from math import floor
4
4
  from dataclasses import dataclass
5
5
  from importlib.metadata import version, PackageNotFoundError
@@ -691,6 +691,12 @@ class ObservationManager:
691
691
 
692
692
 
693
693
  class UpdateManager:
694
+ # The timezone this source stamps observation timestamps with. Every
695
+ # subclass must declare its own — MTTimeSeries.identify_vintage uses it to
696
+ # interpret tz-naive candidate data, so a wrong value silently breaks
697
+ # matching for that source.
698
+ NATIVE_OBSERVATION_TZ: tzinfo
699
+
694
700
  def __init__(
695
701
  self,
696
702
  dataset_id: str,
@@ -631,6 +631,8 @@ class FredObservationManager(ObservationManager):
631
631
 
632
632
 
633
633
  class FredUpdateManager(UpdateManager):
634
+ NATIVE_OBSERVATION_TZ = US_CENTRAL
635
+
634
636
  def __init__(
635
637
  self,
636
638
  dataset_id: str,
@@ -1091,6 +1091,8 @@ class ONSObservationManager(ObservationManager):
1091
1091
 
1092
1092
 
1093
1093
  class ONSUpdateManager(UpdateManager):
1094
+ NATIVE_OBSERVATION_TZ = UTC
1095
+
1094
1096
  def __init__(
1095
1097
  self,
1096
1098
  dataset_id: str,
@@ -957,6 +957,8 @@ class RTDSMObservationManager(ObservationManager):
957
957
 
958
958
 
959
959
  class RTDSMUpdateManager(UpdateManager):
960
+ NATIVE_OBSERVATION_TZ = UTC
961
+
960
962
  def __init__(
961
963
  self,
962
964
  dataset_id: str,
@@ -3,6 +3,7 @@ from unittest.mock import MagicMock, patch
3
3
  from datetime import datetime, timedelta, timezone
4
4
  import pandas as pd
5
5
  import numpy as np
6
+ import pytz
6
7
  from darts import TimeSeries
7
8
 
8
9
  from macrotrace.models import (
@@ -1368,10 +1369,10 @@ def test_identify_vintage_respects_tolerance(sample_time_series_with_revisions):
1368
1369
  ).matched
1369
1370
 
1370
1371
 
1371
- def test_identify_vintage_naive_index_assumes_utc(
1372
+ def test_identify_vintage_naive_index_unknown_source_falls_back_to_utc(
1372
1373
  sample_time_series_with_revisions, caplog
1373
1374
  ):
1374
- """A tz-naive index is assumed to be UTC (with a warning) and still matches."""
1375
+ """A tz-naive index for a source with no registered manager is interpreted as UTC (with a warning) and still matches."""
1375
1376
  target_release = datetime(2024, 12, 10, tzinfo=timezone.utc)
1376
1377
  vintage = _vintage_with_release_date(
1377
1378
  sample_time_series_with_revisions, target_release
@@ -1381,10 +1382,229 @@ def test_identify_vintage_naive_index_assumes_utc(
1381
1382
 
1382
1383
  result = sample_time_series_with_revisions.identify_vintage(candidate)
1383
1384
 
1384
- assert "series index has no timezone information. Assuming UTC." in caplog.text
1385
+ assert "series index has no timezone information" in caplog.text
1386
+ assert "(UTC)" in caplog.text
1385
1387
  assert result.release_date == target_release
1386
1388
 
1387
1389
 
1390
+ def test_identify_vintage_naive_index_uses_source_native_timezone():
1391
+ """
1392
+ A tz-naive index on a FRED series is interpreted at US Central midnight,
1393
+ matching how FRED stores observations — including across a DST change,
1394
+ where the UTC offset differs between observations.
1395
+ """
1396
+ us_central = pytz.timezone("America/Chicago")
1397
+ release_date = datetime(2024, 3, 12, tzinfo=timezone.utc)
1398
+ # One observation either side of the 2024-03-10 US DST transition.
1399
+ naive_dates = [datetime(2024, 3, 9), datetime(2024, 3, 11)]
1400
+ observations = [
1401
+ MTObservation(
1402
+ timestamp=us_central.localize(date),
1403
+ value=100.0 + i,
1404
+ release_date=release_date,
1405
+ )
1406
+ for i, date in enumerate(naive_dates)
1407
+ ]
1408
+ ts = MTTimeSeries._from_data(
1409
+ dataset_id="TEST",
1410
+ release_date=release_date,
1411
+ current_observations=observations,
1412
+ vintages=[],
1413
+ source="FRED",
1414
+ frequency="D",
1415
+ )
1416
+
1417
+ candidate = pd.Series([100.0, 101.0], index=pd.to_datetime(naive_dates))
1418
+
1419
+ result = ts.identify_vintage(candidate)
1420
+
1421
+ assert result.matched
1422
+ assert result.release_date == release_date
1423
+
1424
+
1425
+ def test_identify_vintage_rejects_numeric_index(sample_time_series):
1426
+ """A positional or numeric index would silently become nanosecond offsets from 1970, so it is rejected."""
1427
+ positional = pd.Series([100.0, 101.0, 102.0])
1428
+ with pytest.raises(ValueError, match="numeric index"):
1429
+ sample_time_series.identify_vintage(positional)
1430
+
1431
+ year_indexed = pd.Series([100.0, 101.0], index=[2024, 2025])
1432
+ with pytest.raises(ValueError, match="numeric index"):
1433
+ sample_time_series.identify_vintage(year_indexed)
1434
+
1435
+
1436
+ def test_identify_vintage_accepts_period_index(sample_time_series):
1437
+ """A PeriodIndex is compared on each period's start timestamp."""
1438
+ full = sample_time_series.to_series()
1439
+ candidate = pd.Series(
1440
+ full.to_numpy(),
1441
+ index=pd.PeriodIndex(full.index.tz_localize(None), freq="D"),
1442
+ )
1443
+
1444
+ result = sample_time_series.identify_vintage(candidate)
1445
+
1446
+ assert result.matched
1447
+ assert result.release_date == sample_time_series.release_date
1448
+
1449
+
1450
+ def test_identify_vintage_decimals_rounds_both_sides(
1451
+ sample_time_series_with_revisions,
1452
+ ):
1453
+ """Rounding-aware comparison matches data republished at lower precision without loosening atol."""
1454
+ target_release = datetime(2024, 12, 10, tzinfo=timezone.utc)
1455
+ vintage = _vintage_with_release_date(
1456
+ sample_time_series_with_revisions, target_release
1457
+ )
1458
+
1459
+ # Perturbed below the rounding boundary: fails raw, matches at one decimal.
1460
+ candidate = vintage.to_series() + 0.04
1461
+ assert not sample_time_series_with_revisions.identify_vintage(candidate).matched
1462
+
1463
+ result = sample_time_series_with_revisions.identify_vintage(candidate, decimals=1)
1464
+ assert result.release_date == target_release
1465
+ assert result.decimals == 1
1466
+
1467
+ # Perturbed past the rounding boundary: rounds away from the stored values.
1468
+ assert not sample_time_series_with_revisions.identify_vintage(
1469
+ vintage.to_series() + 0.06, decimals=1
1470
+ ).matched
1471
+
1472
+
1473
+ def test_identify_vintage_failure_reason(sample_time_series):
1474
+ """failure_reason separates timestamp-coverage failures from value disagreements."""
1475
+ full = sample_time_series.to_series()
1476
+
1477
+ matched = sample_time_series.identify_vintage(full)
1478
+ assert matched.failure_reason is None
1479
+ assert matched.n_vintages_compared == len(
1480
+ sample_time_series._vintages_including_current_series
1481
+ )
1482
+
1483
+ # Same values at timestamps no vintage contains: fails on coverage.
1484
+ shifted = full.copy()
1485
+ shifted.index = shifted.index + pd.Timedelta(hours=6)
1486
+ coverage_failure = sample_time_series.identify_vintage(shifted)
1487
+ assert not coverage_failure.matched
1488
+ assert coverage_failure.failure_reason == "coverage"
1489
+ assert coverage_failure.n_vintages_covering == 0
1490
+ assert "check the index dates/timezone" in repr(coverage_failure)
1491
+
1492
+ # Right timestamps, wrong values: fails on values, and no reinterpretation
1493
+ # of the timestamps can explain values that exist in no vintage.
1494
+ wrong_values = pd.Series(9999.0, index=full.index)
1495
+ value_failure = sample_time_series.identify_vintage(wrong_values)
1496
+ assert not value_failure.matched
1497
+ assert value_failure.failure_reason == "values"
1498
+ assert value_failure.n_vintages_covering > 0
1499
+ assert "no matching vintage found" in repr(value_failure)
1500
+ assert value_failure.alignment_hint is None
1501
+ assert value_failure.time_shift is None
1502
+
1503
+
1504
+ def test_identify_vintage_hints_constant_shift(sample_time_series):
1505
+ """An index shifted by a constant offset is flagged with the shift that aligns it."""
1506
+ full = sample_time_series.to_series()
1507
+ shifted = full.copy()
1508
+ shifted.index = shifted.index + pd.Timedelta(hours=6)
1509
+
1510
+ result = sample_time_series.identify_vintage(shifted)
1511
+
1512
+ assert not result.matched
1513
+ assert result.time_shift == pd.Timedelta(hours=-6)
1514
+ assert "shifted back by" in result.alignment_hint
1515
+ assert "hint:" in repr(result)
1516
+
1517
+
1518
+ def test_identify_vintage_hints_wrong_timezone(caplog):
1519
+ """
1520
+ A tz-aware index localized to the wrong timezone is flagged via wall-clock
1521
+ reinterpretation — across a DST change, where no constant shift exists.
1522
+ """
1523
+ us_central = pytz.timezone("America/Chicago")
1524
+ release_date = datetime(2024, 3, 12, tzinfo=timezone.utc)
1525
+ naive_dates = [datetime(2024, 3, 8), datetime(2024, 3, 9), datetime(2024, 3, 11)]
1526
+ observations = [
1527
+ MTObservation(
1528
+ timestamp=us_central.localize(date),
1529
+ value=100.0 + i,
1530
+ release_date=release_date,
1531
+ )
1532
+ for i, date in enumerate(naive_dates)
1533
+ ]
1534
+ ts = MTTimeSeries._from_data(
1535
+ dataset_id="TEST",
1536
+ release_date=release_date,
1537
+ current_observations=observations,
1538
+ vintages=[],
1539
+ source="FRED",
1540
+ frequency="D",
1541
+ )
1542
+
1543
+ # The right wall-clock dates, wrongly localized to UTC.
1544
+ candidate = pd.Series(
1545
+ [100.0, 101.0, 102.0], index=pd.to_datetime(naive_dates).tz_localize("UTC")
1546
+ )
1547
+
1548
+ result = ts.identify_vintage(candidate)
1549
+
1550
+ assert not result.matched
1551
+ assert result.time_shift is None
1552
+ assert "localized to the wrong timezone" in result.alignment_hint
1553
+ assert "America/Chicago" in result.alignment_hint
1554
+ assert "localized to the wrong timezone" in caplog.text
1555
+
1556
+
1557
+ def test_identify_vintage_hints_period_alignment():
1558
+ """Month-end dates against month-start storage — not a constant offset — are flagged via period comparison."""
1559
+ release_date = datetime(2024, 5, 2, tzinfo=timezone.utc)
1560
+ month_starts = pd.date_range("2024-01-01", periods=4, freq="MS", tz="UTC")
1561
+ observations = [
1562
+ MTObservation(
1563
+ timestamp=ts_.to_pydatetime(), value=100.0 + i, release_date=release_date
1564
+ )
1565
+ for i, ts_ in enumerate(month_starts)
1566
+ ]
1567
+ ts = MTTimeSeries._from_data(
1568
+ dataset_id="TEST",
1569
+ release_date=release_date,
1570
+ current_observations=observations,
1571
+ vintages=[],
1572
+ source="USER",
1573
+ frequency="MS",
1574
+ )
1575
+
1576
+ month_ends = pd.to_datetime(
1577
+ ["2024-01-31", "2024-02-29", "2024-03-31", "2024-04-30"]
1578
+ )
1579
+ candidate = pd.Series([100.0, 101.0, 102.0, 103.0], index=month_ends)
1580
+
1581
+ result = ts.identify_vintage(candidate)
1582
+
1583
+ assert not result.matched
1584
+ assert result.time_shift is None
1585
+ assert "calendar period" in result.alignment_hint
1586
+ assert "month-end" in result.alignment_hint
1587
+
1588
+
1589
+ def test_identify_vintage_hint_never_counts_as_match(sample_time_series):
1590
+ """A hinted reinterpretation must not populate release_dates."""
1591
+ full = sample_time_series.to_series()
1592
+ shifted = full.copy()
1593
+ shifted.index = shifted.index + pd.Timedelta(hours=6)
1594
+
1595
+ result = sample_time_series.identify_vintage(shifted)
1596
+
1597
+ assert result.alignment_hint is not None
1598
+ assert result.release_dates == []
1599
+ assert result.release_date is None
1600
+
1601
+
1602
+ def test_source_managers_declare_native_observation_timezone():
1603
+ """Every registered source manager declares the timezone it stamps observations with."""
1604
+ for name, manager in MTTimeSeries._source_manager_classes().items():
1605
+ assert getattr(manager, "NATIVE_OBSERVATION_TZ", None) is not None, name
1606
+
1607
+
1388
1608
  def test_identify_vintage_require_exact_coverage(sample_time_series):
1389
1609
  """
1390
1610
  Exact coverage disambiguates a window: only the vintage whose timestamps
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes