ev-flow 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ Metadata-Version: 2.4
2
+ Name: ev-flow
3
+ Version: 3.0.0
4
+ Summary: Synthetic plug-in electric vehicle charging dataset pipeline and library API.
5
+ Project-URL: Homepage, https://github.com/bertravacca/ev-flow
6
+ Project-URL: Repository, https://github.com/bertravacca/ev-flow
7
+ Project-URL: Issues, https://github.com/bertravacca/ev-flow/issues
8
+ Author-email: Bertrand Travacca <bertrand.travacca@gmail.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Bertrand Travacca
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: charging,electric-vehicles,energy,ev,grid,nhts,synthetic-data
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Science/Research
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Scientific/Engineering
42
+ Requires-Python: >=3.10
43
+ Requires-Dist: numpy>=1.26
44
+ Requires-Dist: pandas>=2.2
45
+ Requires-Dist: pyarrow>=16
46
+ Requires-Dist: pytz>=2024.1
47
+ Requires-Dist: requests>=2.32
48
+ Requires-Dist: scikit-learn>=1.4
49
+ Requires-Dist: scipy>=1.12
50
+ Provides-Extra: dev
51
+ Requires-Dist: build>=1.2; extra == 'dev'
52
+ Requires-Dist: pytest-cov>=5; extra == 'dev'
53
+ Requires-Dist: pytest>=8; extra == 'dev'
54
+ Requires-Dist: ruff>=0.5; extra == 'dev'
55
+ Requires-Dist: twine>=5; extra == 'dev'
56
+ Description-Content-Type: text/markdown
57
+
58
+ # ev-flow
59
+
60
+ Synthetic plug-in electric vehicle (PEV) charging dataset pipeline and library API.
61
+
62
+ `ev-flow` generates realistic, fleet-scale charging behavior for residential and workplace EVs, grounded in the National Household Travel Survey (NHTS) and a regional sales-mix model. It exposes both a low-level pipeline (NHTS loading, donor matching, travel-week building, plug-in modeling, state-of-charge trajectory, hourly rasterisation) and a clean `Fleet` / `Profile` library API for downstream studies.
63
+
64
+ ## Install
65
+
66
+ ```bash
67
+ pip install ev-flow
68
+ ```
69
+
70
+ Then set `PEV_SYNTH_DATA_ROOT` to point at your data tree — see the next section. Without that step, `generate_profiles(...)` will raise `FileNotFoundError` because the wheel does not bundle the cached fleet bundles.
71
+
72
+ ## Data directory
73
+
74
+ `ev-flow` ships only the Python package; the cached fleet bundles (NHTS-derived parquets etc.) are not bundled in the wheel. Point the package at your local data directory via the `PEV_SYNTH_DATA_ROOT` environment variable:
75
+
76
+ ```bash
77
+ export PEV_SYNTH_DATA_ROOT=/path/to/your/ev-flow-data
78
+ ```
79
+
80
+ The directory should contain the `pev/processed/<region>/<profile_type>_ev_synth/` layout that `python -m pev_synth.cache_regen one ...` writes. If `PEV_SYNTH_DATA_ROOT` is unset, the package falls back to `<repo_root>/data/` — only useful in a `pip install -e .` dev checkout where the `data/` tree sits next to `src/`.
81
+
82
+ ### First run / bootstrap (dev checkout)
83
+
84
+ The cached fleet bundles are **not** in the repo and **not** in the wheel — you build them from NHTS 2017 microdata, which is also not bundled. For a fresh `pip install -e .` dev checkout the one-time sequence is:
85
+
86
+ ```bash
87
+ # (a) one-time: download (~84 MB ORNL zip) + process NHTS 2017.
88
+ # Writes the California parquets and the national hhpub.csv/vehpub.csv
89
+ # to data/pev/raw/nhts2017/.
90
+ python -m pev_synth.nhts_loader
91
+
92
+ # (b) build a cache for the (region, profile_type) you want.
93
+ # Subcommands are `one`, `batch`, `audit`.
94
+ python -m pev_synth.cache_regen one --region bay_area --profile-type residential
95
+
96
+ # (c) now the library API works:
97
+ python -c "import pev_synth as ps; print(ps.generate_profiles('residential', n=10, region='bay_area'))"
98
+ ```
99
+
100
+ Step (a) runs once: the loader persists the national `hhpub.csv` / `vehpub.csv`, so non-CA regions (`boston`, `chicago`, `dallas_fort_worth`, `new_york_metro`, `seattle`) are then handled automatically by `cache_regen one` without re-downloading.
101
+
102
+ Pip-installed (non-dev) users do not run the bootstrap — instead point `PEV_SYNTH_DATA_ROOT` at a prebuilt data tree as described above.
103
+
104
+ ## Quick start
105
+
106
+ ```python
107
+ import pev_synth as ps
108
+
109
+ ps.list_regions()
110
+ # ['bay_area', 'boston', 'chicago', 'dallas_fort_worth',
111
+ # 'la_basin', 'new_york_metro', 'seattle', 'us_national']
112
+
113
+ ps.list_profile_types()
114
+ # ['residential', 'workplace']
115
+
116
+ fleet = ps.generate_profiles('residential', n=1000, region='bay_area', seed=42)
117
+ prof = fleet[0]
118
+
119
+ pa = prof.generate_presence_absence('2001-01-01', '2001-01-08', freq='15min')
120
+ sess = prof.charging_sessions('2001-06-01', '2001-06-08')
121
+ soc = prof.soc_trajectory('2001-06-01', '2001-06-08', freq='15min')
122
+ ```
123
+
124
+ The PyPI distribution name is **`ev-flow`** but the Python import name is **`pev_synth`** (this mirrors the `scikit-learn` / `sklearn` convention).
125
+
126
+ ## Workplace caveat
127
+
128
+ In v2.0 the `workplace` cluster centres are fit from the 105-vehicle public EVWatts cohort, whose plug-in median is ~12:00 LT — approximately 3 hours later than the literature-canonical workplace median of ~09:00 LT. The W1-W4 validator checks flag this divergence as `EXPLAINED_FAIL` rather than as a bug. `pev_synth` surfaces this caveat as a `RuntimeWarning` at `Fleet.__init__` whenever `profile_type == 'workplace'`. See `src/pev_synth/plug_in_model.py:42-48` for the full discussion.
129
+
130
+ ## Modules
131
+
132
+ | Module | Purpose |
133
+ |---|---|
134
+ | `nhts_loader` | National Household Travel Survey 2017 public-use file loader |
135
+ | `vehicle_archetypes` | N-EV archetype sampler |
136
+ | `donor_matcher` | NHTS donor-vehicle matcher |
137
+ | `travel_week_builder` | One-year travel sequence builder |
138
+ | `plug_in_model` | Session plug-in / dwell sampler |
139
+ | `soc_trajectory` | Continuous-time state-of-charge ledger + session extraction |
140
+ | `hourly_resampler` | 15-minute and hourly plug-status rasteriser |
141
+ | `validation_bounds_curator` | Bound curation |
142
+ | `validator` | Validation runner + report writer (11 §10 + 3 integration + 1 DST + 1 winter + 10 workplace + 1 workplace-optim checks) |
143
+ | `regions` | 8-region registry |
144
+
145
+ Full library API reference and methodology rationale live in the [`documentation/`](documentation/) folder (expanding ahead of the docs-site launch).
146
+
147
+ ## Development
148
+
149
+ ```bash
150
+ git clone https://github.com/bertravacca/ev-flow
151
+ cd ev-flow
152
+ python -m venv .venv && source .venv/bin/activate
153
+ pip install -e ".[dev]"
154
+ pytest
155
+ ```
156
+
157
+ ## License
158
+
159
+ MIT. See `LICENSE`.
@@ -0,0 +1,25 @@
1
+ pev_synth/__init__.py,sha256=LB02h9rXB6TFOTjkf2kBu0NNkUumusR6YKeC6plGUnQ,2553
2
+ pev_synth/_meta.py,sha256=iBifIm6zHE358-UyHqAvrtgDUS9b4Fxl3DvfTukrARA,9800
3
+ pev_synth/_paths.py,sha256=GCO3D1Oad4q8HIHOJMdNU8Qqulh6sygKIFxxyn4qjvE,3559
4
+ pev_synth/_phev_fuel_economy.py,sha256=v5_Os1m1o_ZEHJQZwD_loYdBEKJCkwuv7qQvWTp1nlg,13979
5
+ pev_synth/_seeds.py,sha256=MpkitrzELerWoCGfOmcS3Eg8FiACmuM4HJouJIj94IY,7734
6
+ pev_synth/_utc_migration.py,sha256=UtLQ1Sk0QVv8I1rYy4WCDxZ4IeueTJ6hcY84yQSlYaA,17572
7
+ pev_synth/acs_loader.py,sha256=ffAJR-IF9GBwNxD8M7ab4RqXI5DcZSj_bfYg1aUFKIc,27780
8
+ pev_synth/api.py,sha256=52g4aAGICfDAJarRHSiRHuZFB4NXGfVIDA05ngJ4ceQ,64268
9
+ pev_synth/cache_regen.py,sha256=4fIoGWCZCkTyL1Msypmy3wG-Qcbj9_S_fh67RlWjXrI,44338
10
+ pev_synth/donor_matcher.py,sha256=GRpqTLop43TArxekcZOIRTrqv6k0iPzjAOrH3hqLP7M,58078
11
+ pev_synth/hourly_resampler.py,sha256=cDk2BRIWuLJQEykCno_kO4yQOhj6wQtz338ee6mjz-g,37191
12
+ pev_synth/nhts_loader.py,sha256=chMYuXVU4vsWMM9m_Eua0pf7Husrt2dlLx4rpuatAUw,27877
13
+ pev_synth/nhts_nextgen_loader.py,sha256=7Jn3rnZgBqG4KHuOPwscvYlzXmNtmnnOZf1wZdsAntg,27538
14
+ pev_synth/plug_in_model.py,sha256=NklOJ9djfca33mYBfUgdWKeqyXA2hfnPA95rtS4PYo0,88662
15
+ pev_synth/regions.py,sha256=dQ73M6dC3hd_usQSSZ1vosvdh0AapxLn7OqKSUft1Tk,16911
16
+ pev_synth/sales_mix_data.py,sha256=Jh5kU6OL2WcIqVuKMU0YQ10iz7IAoOB_QsmFUrAtvOA,46701
17
+ pev_synth/soc_trajectory.py,sha256=PRmrp9XES7VaCTs5768SRh4O6i8QyBKZWHi-hbseO3s,66279
18
+ pev_synth/travel_week_builder.py,sha256=0HKTbVXCT3kfp4cCvi6qMpx4G8Zl-5QnKLSnSLKnQ0o,78934
19
+ pev_synth/validation_bounds_curator.py,sha256=FmcEt-eCZ5AMQvcw96FarCtUACvM96BTIwi7Ij_4NBo,112621
20
+ pev_synth/validator.py,sha256=SqtucMG197ZeDK_u9lDPI5DfcYYB9r5W2bJxgVYDw54,121344
21
+ pev_synth/vehicle_archetypes.py,sha256=Ifd_Mfw76eC2nSpMow573GqophDt9WWygZeY-e6W9zo,81972
22
+ ev_flow-3.0.0.dist-info/METADATA,sha256=5t06x945my1ZBjdFk4GObnZExt2xfJosgLpV8XBbsAg,7589
23
+ ev_flow-3.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
24
+ ev_flow-3.0.0.dist-info/licenses/LICENSE,sha256=Sz1QYg2p2ekJRUWeWOBUpj7pZm2nPe5jjZQon5vHUXU,1074
25
+ ev_flow-3.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Bertrand Travacca
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pev_synth/__init__.py ADDED
@@ -0,0 +1,68 @@
1
+ """pev_synth — Synthetic EV charging dataset pipeline + library API.
2
+
3
+ Library API (v2.0)
4
+ ------------------
5
+ The public surface for downstream code is:
6
+
7
+ >>> import pev_synth as ps
8
+ >>> ps.list_regions()
9
+ ['bay_area', 'boston', 'chicago', 'dallas_fort_worth', 'la_basin',
10
+ 'new_york_metro', 'seattle', 'us_national']
11
+ >>> ps.list_profile_types()
12
+ ['residential', 'workplace']
13
+ >>> fleet = ps.generate_profiles('residential', n=1000,
14
+ ... region='bay_area')
15
+ >>> prof = fleet[0]
16
+ >>> pa = prof.generate_presence_absence('2001-01-01', '2001-01-08')
17
+
18
+ See ``src/pev_synth/api.py`` and ``docs/pev_synth_api.md`` for details.
19
+
20
+ Pipeline modules (M1..M9, methodology v2.0.0, master seed 20260520)
21
+ -------------------------------------------------------------------
22
+ * ``nhts_loader`` — NHTS 2017 public-use file loader.
23
+ * ``vehicle_archetypes`` — N-EV archetype sampler (M2).
24
+ * ``donor_matcher`` — NHTS donor-vehicle matcher (M3).
25
+ * ``travel_week_builder`` — one-year travel sequence builder (M4).
26
+ * ``plug_in_model`` — session plug-in / dwell sampler (M5).
27
+ * ``soc_trajectory`` — continuous-time SoC ledger + sessions (M6).
28
+ * ``hourly_resampler`` — 15-min + hourly plug-status rasteriser (M7).
29
+ * ``validation_bounds_curator`` — bound curation (M8).
30
+ * ``validator`` — §10 validation runner + report writer (M9).
31
+ * ``regions`` — 8-region registry (Region dataclass).
32
+ * ``_utc_migration`` — package-internal v1.1 → v2.0 UTC cache
33
+ migrator (leading underscore = not part of
34
+ the public API; used only by in-house
35
+ callers with a v1.1 cache to upgrade).
36
+
37
+ The library API wraps the artifacts these modules produce.
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ from .api import (
43
+ Fleet,
44
+ Profile,
45
+ ProfileType,
46
+ generate_profiles,
47
+ regenerate_fleet,
48
+ )
49
+
50
+ # v2.0: ``list_profile_types`` and the Region registry come from
51
+ # ``pev_synth.regions`` so the public listing returns ``["residential",
52
+ # "workplace"]`` (``fleet_depot`` was de-scoped — plan §2.6).
53
+ from .regions import REGIONS, Region, list_profile_types, list_regions
54
+
55
+ __version__ = "3.0.0"
56
+
57
+ __all__ = [
58
+ "Fleet",
59
+ "Profile",
60
+ "ProfileType",
61
+ "REGIONS",
62
+ "Region",
63
+ "generate_profiles",
64
+ "list_profile_types",
65
+ "list_regions",
66
+ "regenerate_fleet",
67
+ "__version__",
68
+ ]
pev_synth/_meta.py ADDED
@@ -0,0 +1,266 @@
1
+ """Single typed writer for the per-cache ``meta.json`` artifact.
2
+
3
+ RFC-021: seven independent modules (M2 – M7 + cache_regen.finalise +
4
+ validator) currently read-modify-write ``meta.json`` with ad-hoc keys.
5
+ Symptoms include the known ``donor_borrow_rate`` nesting bug and lots of
6
+ silently-overlapping top-level keys.
7
+
8
+ This module:
9
+
10
+ * documents the canonical sub-namespaces (one per producer) in
11
+ :class:`MetaSchema`,
12
+ * exposes :class:`MetaWriter` with ``setdefault``-semantics so the
13
+ multi-producer pattern stays append-only.
14
+
15
+ Existing per-module helpers (``update_meta_with_m5``,
16
+ ``update_meta_with_m7``, …) continue to work in v2.0-rev — they just
17
+ hand off the atomic write to :func:`MetaWriter.write`. The CLI surface
18
+ is untouched.
19
+
20
+ Math notation: ASCII.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import os
27
+ from dataclasses import dataclass, field
28
+ from pathlib import Path
29
+ from typing import Any
30
+
31
+ __all__ = [
32
+ "MetaSchema",
33
+ "MetaWriter",
34
+ "load_meta",
35
+ "write_meta_atomic",
36
+ ]
37
+
38
+
39
+ #: Canonical RMW (read-modify-write) stage keys for ``MetaWriter.update``.
40
+ #: Each maps a stage label to (sub_namespace_key, module_provenance_key).
41
+ RMW_STAGES: dict[str, tuple[str, str]] = {
42
+ "donor_matcher": ("donor_matcher", "m3_donor_matcher"),
43
+ "soc_trajectory": ("soc_trajectory", "m6_soc_trajectory"),
44
+ "hourly_resampler": ("hourly_resampler", "m7_hourly_resampler"),
45
+ }
46
+
47
+
48
+ @dataclass
49
+ class MetaSchema:
50
+ """Documented canonical schema for ``meta.json``.
51
+
52
+ Each top-level key is owned by one producer; secondary writers MUST
53
+ use ``setdefault`` semantics to avoid clobbering. This dataclass is
54
+ informational — it does not enforce key presence at runtime (yet);
55
+ runtime validation lives in :func:`pev_synth.validator.run`.
56
+ """
57
+
58
+ #: Set by M2 / cache_regen at bootstrap.
59
+ methodology_version: str = ""
60
+ master_seed: int = 0
61
+ region: dict[str, Any] = field(default_factory=dict)
62
+ profile_type: str = ""
63
+ cache_provenance: str = ""
64
+ storage_timezone: str = ""
65
+ run_timestamp_utc: str = ""
66
+ git_commit: str = ""
67
+ #: One sub-namespace per producer module.
68
+ module_provenance: dict[str, str] = field(default_factory=dict)
69
+ vehicle_archetypes: dict[str, Any] = field(default_factory=dict)
70
+ donor_matcher: dict[str, Any] = field(default_factory=dict)
71
+ travel_week_builder: dict[str, Any] = field(default_factory=dict)
72
+ plug_in_model: dict[str, Any] = field(default_factory=dict)
73
+ soc_trajectory: dict[str, Any] = field(default_factory=dict)
74
+ hourly_resampler: dict[str, Any] = field(default_factory=dict)
75
+ validator: dict[str, Any] = field(default_factory=dict)
76
+ #: Aggregate metrics derived from sub-namespaces.
77
+ donor_borrow_rate: float = 0.0
78
+ consumption_model: dict[str, Any] = field(default_factory=dict)
79
+
80
+ # ---- v3.0 OPTIONAL provenance extensions ---------------------------
81
+ # All default to ``None`` (or absent on disk via setdefault) so v2.1
82
+ # meta.json files still validate against this schema unchanged.
83
+
84
+ #: Realized NHTS vintage mix actually drawn for this cache (e.g.
85
+ #: ``{"2017": 1.0}`` for v2.1-style default, or ``{"2017": 0.8,
86
+ #: "2022_nextgen": 0.2}`` when NextGen enrichment is opted in).
87
+ nhts_vintage_mix: dict[str, float] | None = None
88
+
89
+ #: HOUSEID stitching mode used by M4 — ``"v2_per_travday"`` (legacy
90
+ #: per-TRAVDAY donor-day reuse) or ``"v3_within_hh"`` (intra-HOUSEID
91
+ #: person-day reuse with weekday/weekend partition; v3.0 default).
92
+ houseid_stitch_mode: str | None = None
93
+
94
+ #: True when the v3 PHEV gasoline extension (gas_kwh_equivalent
95
+ #: ledger) is active for this cache; False/None for BEV-only or
96
+ #: legacy v2.1 runs.
97
+ phev_modeled: bool | None = None
98
+
99
+ #: True when ACS PUMS raking was applied to the household-mix
100
+ #: expected PMF for this region/profile combination.
101
+ acs_calibration_applied: bool | None = None
102
+
103
+ #: Free-form descriptor of the SPEECh K=16 axis projection in use
104
+ #: (e.g. ``"start_time_marginal_with_P(G)_prior"``). ``None`` when
105
+ #: the K=16 SPEECh path is not active (legacy K=6 fit).
106
+ speech_k16_axis_projection: str | None = None
107
+
108
+
109
+ def load_meta(meta_path: str | os.PathLike[str]) -> dict[str, Any]:
110
+ """Idempotent loader — returns ``{}`` if the file is absent."""
111
+ p = Path(meta_path)
112
+ if not p.is_file():
113
+ return {}
114
+ with p.open("r", encoding="utf-8") as fh:
115
+ return json.load(fh)
116
+
117
+
118
+ def write_meta_atomic(
119
+ meta_path: str | os.PathLike[str],
120
+ meta: dict[str, Any],
121
+ *,
122
+ default: Any = None,
123
+ ) -> Path:
124
+ """Atomic JSON write (tmp + os.replace) with sorted keys + indent=2.
125
+
126
+ ``default`` is forwarded to :func:`json.dump` to coerce non-trivial
127
+ payloads (numpy scalars, ``Path``); ``None`` is the standard JSON
128
+ behaviour.
129
+ """
130
+ p = Path(meta_path)
131
+ p.parent.mkdir(parents=True, exist_ok=True)
132
+ tmp = p.with_suffix(p.suffix + ".tmp")
133
+ with tmp.open("w", encoding="utf-8") as fh:
134
+ json.dump(meta, fh, indent=2, sort_keys=True, default=default)
135
+ os.replace(tmp, p)
136
+ return p
137
+
138
+
139
+ class MetaWriter:
140
+ """Append-only ``meta.json`` writer with ``setdefault`` semantics.
141
+
142
+ Usage:
143
+
144
+ with MetaWriter(meta_path) as m:
145
+ m.set_default("methodology_version", "v3.0")
146
+ m.merge("plug_in_model", {"K_chosen_by_BIC": 4, ...})
147
+
148
+ The writer reloads the file on enter and persists atomically on
149
+ exit. Concurrent writers on the same cache are NOT serialised — the
150
+ contract is per-stage, sequential.
151
+ """
152
+
153
+ def __init__(self, meta_path: str | os.PathLike[str]) -> None:
154
+ self.meta_path = Path(meta_path)
155
+ self.meta: dict[str, Any] = {}
156
+
157
+ def __enter__(self) -> MetaWriter:
158
+ self.meta = load_meta(self.meta_path)
159
+ return self
160
+
161
+ def __exit__(self, exc_type, exc, tb) -> None: # type: ignore[no-untyped-def]
162
+ if exc_type is None:
163
+ write_meta_atomic(self.meta_path, self.meta)
164
+
165
+ # ---- ergonomic helpers -------------------------------------------
166
+
167
+ def set_default(self, key: str, value: Any) -> None:
168
+ """``setdefault`` semantics — never overwrite an existing key."""
169
+ self.meta.setdefault(key, value)
170
+
171
+ def set(self, key: str, value: Any) -> None:
172
+ """Force-set; use only when the producer is the sole owner."""
173
+ self.meta[key] = value
174
+
175
+ def merge(self, key: str, payload: dict[str, Any]) -> None:
176
+ """Update sub-namespace ``key`` with ``payload`` (shallow merge)."""
177
+ block = self.meta.setdefault(key, {})
178
+ if isinstance(block, dict):
179
+ block.update(payload)
180
+ else:
181
+ self.meta[key] = payload
182
+
183
+ def record_module(self, module_name: str, methodology_version: str) -> None:
184
+ """Stamp ``module_provenance[module] = methodology_version``."""
185
+ prov = self.meta.setdefault("module_provenance", {})
186
+ if isinstance(prov, dict):
187
+ prov[module_name] = methodology_version
188
+
189
+ # ---- RFC-021.r unified update entry point ------------------------
190
+
191
+ @classmethod
192
+ def update(
193
+ cls,
194
+ meta_path: str | os.PathLike[str],
195
+ *,
196
+ stage: str,
197
+ payload: dict[str, Any],
198
+ methodology_version: str | None = None,
199
+ top_level: dict[str, Any] | None = None,
200
+ json_default: Any = None,
201
+ ) -> Path:
202
+ """Single-call read-modify-write for one RMW stage.
203
+
204
+ Parameters
205
+ ----------
206
+ meta_path:
207
+ Path to the cache's ``meta.json``.
208
+ stage:
209
+ One of the keys in :data:`RMW_STAGES` (``donor_matcher``,
210
+ ``soc_trajectory``, ``hourly_resampler``). Determines which
211
+ sub-namespace block and which ``module_provenance`` key the
212
+ payload writes through.
213
+ payload:
214
+ Block-level dict appended under ``meta[<sub_namespace>]``.
215
+ methodology_version:
216
+ If given, also stamped into
217
+ ``meta.module_provenance[<prov_key>]``. When omitted, the
218
+ value is read from ``payload['methodology_version']`` if
219
+ present.
220
+ top_level:
221
+ Optional sibling keys to merge at the root of ``meta`` (e.g.
222
+ ``{"donor_borrow_rate": 0.07, "package_versions": {...}}``).
223
+ Uses shallow ``dict.update`` for nested dict values.
224
+ json_default:
225
+ Forwarded to :func:`json.dump` (e.g. a numpy coercer).
226
+
227
+ Returns
228
+ -------
229
+ Path
230
+ The path the meta was written to (same as ``meta_path``).
231
+ """
232
+ if stage not in RMW_STAGES:
233
+ raise KeyError(
234
+ f"unknown RMW stage {stage!r}; "
235
+ f"valid keys: {sorted(RMW_STAGES)}"
236
+ )
237
+ sub_key, prov_key = RMW_STAGES[stage]
238
+
239
+ p = Path(meta_path)
240
+ meta = load_meta(p)
241
+
242
+ # Sub-namespace block: force-set (single owner per RMW stage).
243
+ meta[sub_key] = dict(payload)
244
+
245
+ # Module provenance stamp.
246
+ version = methodology_version
247
+ if version is None:
248
+ version = payload.get("methodology_version")
249
+ if version is not None:
250
+ prov = meta.setdefault("module_provenance", {})
251
+ if isinstance(prov, dict):
252
+ prov[prov_key] = version
253
+
254
+ # Top-level sibling merges (e.g. donor_borrow_rate, run_timestamp_utc,
255
+ # package_versions). Nested dicts are shallow-merged; scalars overwrite.
256
+ if top_level:
257
+ for k, v in top_level.items():
258
+ if (
259
+ isinstance(v, dict)
260
+ and isinstance(meta.get(k), dict)
261
+ ):
262
+ meta[k].update(v)
263
+ else:
264
+ meta[k] = v
265
+
266
+ return write_meta_atomic(p, meta, default=json_default)
pev_synth/_paths.py ADDED
@@ -0,0 +1,116 @@
1
+ """Central path registry for ``pev_synth``.
2
+
3
+ This module is the **single source of truth** for the repo-root path and
4
+ every ``data/pev/...`` subtree the package reads or writes. Replaces six
5
+ repeated ``Path(__file__).resolve().parents[2]`` computations across
6
+ modules (Arch:A2-1, Dev:P2).
7
+
8
+ All helpers return absolute :class:`pathlib.Path` instances.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ from pathlib import Path
15
+
16
+ __all__ = [
17
+ "repo_root",
18
+ "data_root",
19
+ "raw_root",
20
+ "processed_root",
21
+ "intermediate_root",
22
+ "nhts_intermediate_root",
23
+ "validation_bounds_root",
24
+ "cache_dir",
25
+ "replicate_dir",
26
+ ]
27
+
28
+
29
+ def repo_root() -> Path:
30
+ """Return the absolute path to the repository root.
31
+
32
+ Anchored on this file's location: ``src/pev_synth/_paths.py`` is two
33
+ levels deep under the repo root.
34
+ """
35
+ return Path(__file__).resolve().parents[2]
36
+
37
+
38
+ def data_root() -> Path:
39
+ """Return the package data root.
40
+
41
+ Resolution order:
42
+ 1. If env var ``PEV_SYNTH_DATA_ROOT`` is set, use that (resolved to
43
+ an absolute path). This is the supported override for users who
44
+ pip-installed ``ev-flow`` and keep their data elsewhere on disk.
45
+ 2. Otherwise fall back to ``<repo_root>/data`` — the dev layout
46
+ where ``data/`` sits next to ``src/``.
47
+
48
+ All downstream helpers (``raw_root``, ``processed_root``,
49
+ ``intermediate_root``, ``validation_bounds_root``, ``cache_dir``,
50
+ ``replicate_dir``) chain through this function, so the override
51
+ propagates everywhere.
52
+ """
53
+ env = os.environ.get("PEV_SYNTH_DATA_ROOT")
54
+ if env:
55
+ return Path(env).expanduser().resolve()
56
+ return repo_root() / "data"
57
+
58
+
59
+ def raw_root() -> Path:
60
+ """``<repo_root>/data/pev/raw`` — raw inputs (CSVs, NHTS, …)."""
61
+ return data_root() / "pev" / "raw"
62
+
63
+
64
+ def processed_root() -> Path:
65
+ """``<repo_root>/data/pev/processed`` — per-region cached outputs."""
66
+ return data_root() / "pev" / "processed"
67
+
68
+
69
+ def intermediate_root() -> Path:
70
+ """``<repo_root>/data/pev/intermediate``."""
71
+ return data_root() / "pev" / "intermediate"
72
+
73
+
74
+ def nhts_intermediate_root(region: str | None = None) -> Path:
75
+ """Return ``data/pev/intermediate/nhts`` or, optionally, a region subdir.
76
+
77
+ The per-region NHTS parquets (``hhpub.parquet`` etc.) live under
78
+ ``intermediate/nhts/<region.name>/`` per the brief (line 103) and
79
+ RFC-014 / RFC-017.
80
+ """
81
+ base = intermediate_root() / "nhts"
82
+ if region is None:
83
+ return base
84
+ return base / region
85
+
86
+
87
+ def validation_bounds_root() -> Path:
88
+ """``<repo_root>/data/pev/validation_bounds``.
89
+
90
+ Per RFC-009 the brief layout is per-region:
91
+ ``<root>/<region>/<profile_type>/<check_id>.parquet``.
92
+ """
93
+ return data_root() / "pev" / "validation_bounds"
94
+
95
+
96
+ def cache_dir(region: str, profile_type: str) -> Path:
97
+ """Return the v2.0-native cache root for one ``(region, profile_type)``.
98
+
99
+ ``<processed_root>/<region>/<profile_type>_ev_synth``. Matches the brief
100
+ line 11 layout.
101
+ """
102
+ return processed_root() / region / f"{profile_type}_ev_synth"
103
+
104
+
105
+ def replicate_dir(region: str, profile_type: str, r: int, R: int = 1) -> Path:
106
+ """Return the replicate directory for ``(region, profile_type, r)``.
107
+
108
+ Per brief line 11 / RFC-022:
109
+
110
+ * R == 1: flat layout — returns the cache_dir itself (r=0 alias).
111
+ * R > 1: nested layout — ``<cache_dir>/replicates/r{r}/``.
112
+ """
113
+ base = cache_dir(region, profile_type)
114
+ if R <= 1:
115
+ return base
116
+ return base / "replicates" / f"r{int(r)}"