ev-flow 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ev_flow-3.0.0.dist-info/METADATA +159 -0
- ev_flow-3.0.0.dist-info/RECORD +25 -0
- ev_flow-3.0.0.dist-info/WHEEL +4 -0
- ev_flow-3.0.0.dist-info/licenses/LICENSE +21 -0
- pev_synth/__init__.py +68 -0
- pev_synth/_meta.py +266 -0
- pev_synth/_paths.py +116 -0
- pev_synth/_phev_fuel_economy.py +246 -0
- pev_synth/_seeds.py +183 -0
- pev_synth/_utc_migration.py +497 -0
- pev_synth/acs_loader.py +762 -0
- pev_synth/api.py +1749 -0
- pev_synth/cache_regen.py +1201 -0
- pev_synth/donor_matcher.py +1475 -0
- pev_synth/hourly_resampler.py +961 -0
- pev_synth/nhts_loader.py +713 -0
- pev_synth/nhts_nextgen_loader.py +692 -0
- pev_synth/plug_in_model.py +2308 -0
- pev_synth/regions.py +458 -0
- pev_synth/sales_mix_data.py +1094 -0
- pev_synth/soc_trajectory.py +1548 -0
- pev_synth/travel_week_builder.py +1975 -0
- pev_synth/validation_bounds_curator.py +2763 -0
- pev_synth/validator.py +3103 -0
- pev_synth/vehicle_archetypes.py +1989 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ev-flow
|
|
3
|
+
Version: 3.0.0
|
|
4
|
+
Summary: Synthetic plug-in electric vehicle charging dataset pipeline and library API.
|
|
5
|
+
Project-URL: Homepage, https://github.com/bertravacca/ev-flow
|
|
6
|
+
Project-URL: Repository, https://github.com/bertravacca/ev-flow
|
|
7
|
+
Project-URL: Issues, https://github.com/bertravacca/ev-flow/issues
|
|
8
|
+
Author-email: Bertrand Travacca <bertrand.travacca@gmail.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 Bertrand Travacca
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: charging,electric-vehicles,energy,ev,grid,nhts,synthetic-data
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Intended Audience :: Science/Research
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Operating System :: OS Independent
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
41
|
+
Classifier: Topic :: Scientific/Engineering
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Requires-Dist: numpy>=1.26
|
|
44
|
+
Requires-Dist: pandas>=2.2
|
|
45
|
+
Requires-Dist: pyarrow>=16
|
|
46
|
+
Requires-Dist: pytz>=2024.1
|
|
47
|
+
Requires-Dist: requests>=2.32
|
|
48
|
+
Requires-Dist: scikit-learn>=1.4
|
|
49
|
+
Requires-Dist: scipy>=1.12
|
|
50
|
+
Provides-Extra: dev
|
|
51
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
52
|
+
Requires-Dist: pytest-cov>=5; extra == 'dev'
|
|
53
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
54
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
55
|
+
Requires-Dist: twine>=5; extra == 'dev'
|
|
56
|
+
Description-Content-Type: text/markdown
|
|
57
|
+
|
|
58
|
+
# ev-flow
|
|
59
|
+
|
|
60
|
+
Synthetic plug-in electric vehicle (PEV) charging dataset pipeline and library API.
|
|
61
|
+
|
|
62
|
+
`ev-flow` generates realistic, fleet-scale charging behavior for residential and workplace EVs, grounded in the National Household Travel Survey (NHTS) and a regional sales-mix model. It exposes both a low-level pipeline (NHTS loading, donor matching, travel-week building, plug-in modeling, state-of-charge trajectory, hourly rasterisation) and a clean `Fleet` / `Profile` library API for downstream studies.
|
|
63
|
+
|
|
64
|
+
## Install
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install ev-flow
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Then set `PEV_SYNTH_DATA_ROOT` to point at your data tree — see the next section. Without that step, `generate_profiles(...)` will raise `FileNotFoundError` because the wheel does not bundle the cached fleet bundles.
|
|
71
|
+
|
|
72
|
+
## Data directory
|
|
73
|
+
|
|
74
|
+
`ev-flow` ships only the Python package; the cached fleet bundles (NHTS-derived parquets etc.) are not bundled in the wheel. Point the package at your local data directory via the `PEV_SYNTH_DATA_ROOT` environment variable:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
export PEV_SYNTH_DATA_ROOT=/path/to/your/ev-flow-data
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
The directory should contain the `pev/processed/<region>/<profile_type>_ev_synth/` layout that `python -m pev_synth.cache_regen one ...` writes. If `PEV_SYNTH_DATA_ROOT` is unset, the package falls back to `<repo_root>/data/` — only useful in a `pip install -e .` dev checkout where the `data/` tree sits next to `src/`.
|
|
81
|
+
|
|
82
|
+
### First run / bootstrap (dev checkout)
|
|
83
|
+
|
|
84
|
+
The cached fleet bundles are **not** in the repo and **not** in the wheel — you build them from NHTS 2017 microdata, which is also not bundled. For a fresh `pip install -e .` dev checkout the one-time sequence is:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# (a) one-time: download (~84 MB ORNL zip) + process NHTS 2017.
|
|
88
|
+
# Writes the California parquets and the national hhpub.csv/vehpub.csv
|
|
89
|
+
# to data/pev/raw/nhts2017/.
|
|
90
|
+
python -m pev_synth.nhts_loader
|
|
91
|
+
|
|
92
|
+
# (b) build a cache for the (region, profile_type) you want.
|
|
93
|
+
# Subcommands are `one`, `batch`, `audit`.
|
|
94
|
+
python -m pev_synth.cache_regen one --region bay_area --profile-type residential
|
|
95
|
+
|
|
96
|
+
# (c) now the library API works:
|
|
97
|
+
python -c "import pev_synth as ps; print(ps.generate_profiles('residential', n=10, region='bay_area'))"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Step (a) runs once: the loader persists the national `hhpub.csv` / `vehpub.csv`, so non-CA regions (`boston`, `chicago`, `dallas_fort_worth`, `new_york_metro`, `seattle`) are then handled automatically by `cache_regen one` without re-downloading.
|
|
101
|
+
|
|
102
|
+
Pip-installed (non-dev) users do not run the bootstrap — instead point `PEV_SYNTH_DATA_ROOT` at a prebuilt data tree as described above.
|
|
103
|
+
|
|
104
|
+
## Quick start
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
import pev_synth as ps
|
|
108
|
+
|
|
109
|
+
ps.list_regions()
|
|
110
|
+
# ['bay_area', 'boston', 'chicago', 'dallas_fort_worth',
|
|
111
|
+
# 'la_basin', 'new_york_metro', 'seattle', 'us_national']
|
|
112
|
+
|
|
113
|
+
ps.list_profile_types()
|
|
114
|
+
# ['residential', 'workplace']
|
|
115
|
+
|
|
116
|
+
fleet = ps.generate_profiles('residential', n=1000, region='bay_area', seed=42)
|
|
117
|
+
prof = fleet[0]
|
|
118
|
+
|
|
119
|
+
pa = prof.generate_presence_absence('2001-01-01', '2001-01-08', freq='15min')
|
|
120
|
+
sess = prof.charging_sessions('2001-06-01', '2001-06-08')
|
|
121
|
+
soc = prof.soc_trajectory('2001-06-01', '2001-06-08', freq='15min')
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
The PyPI distribution name is **`ev-flow`** but the Python import name is **`pev_synth`** (this mirrors the `scikit-learn` / `sklearn` convention).
|
|
125
|
+
|
|
126
|
+
## Workplace caveat
|
|
127
|
+
|
|
128
|
+
In v2.0 the `workplace` cluster centres are fit from the 105-vehicle public EVWatts cohort, whose plug-in median is ~12:00 LT — approximately 3 hours later than the literature-canonical workplace median of ~09:00 LT. The W1-W4 validator checks flag this divergence as `EXPLAINED_FAIL` rather than as a bug. `pev_synth` surfaces this caveat as a `RuntimeWarning` at `Fleet.__init__` whenever `profile_type == 'workplace'`. See `src/pev_synth/plug_in_model.py:42-48` for the full discussion.
|
|
129
|
+
|
|
130
|
+
## Modules
|
|
131
|
+
|
|
132
|
+
| Module | Purpose |
|
|
133
|
+
|---|---|
|
|
134
|
+
| `nhts_loader` | National Household Travel Survey 2017 public-use file loader |
|
|
135
|
+
| `vehicle_archetypes` | N-EV archetype sampler |
|
|
136
|
+
| `donor_matcher` | NHTS donor-vehicle matcher |
|
|
137
|
+
| `travel_week_builder` | One-year travel sequence builder |
|
|
138
|
+
| `plug_in_model` | Session plug-in / dwell sampler |
|
|
139
|
+
| `soc_trajectory` | Continuous-time state-of-charge ledger + session extraction |
|
|
140
|
+
| `hourly_resampler` | 15-minute and hourly plug-status rasteriser |
|
|
141
|
+
| `validation_bounds_curator` | Bound curation |
|
|
142
|
+
| `validator` | Validation runner + report writer (11 §10 + 3 integration + 1 DST + 1 winter + 10 workplace + 1 workplace-optim checks) |
|
|
143
|
+
| `regions` | 8-region registry |
|
|
144
|
+
|
|
145
|
+
Full library API reference and methodology rationale live in the [`documentation/`](documentation/) folder (expanding ahead of the docs-site launch).
|
|
146
|
+
|
|
147
|
+
## Development
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
git clone https://github.com/bertravacca/ev-flow
|
|
151
|
+
cd ev-flow
|
|
152
|
+
python -m venv .venv && source .venv/bin/activate
|
|
153
|
+
pip install -e ".[dev]"
|
|
154
|
+
pytest
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
MIT. See `LICENSE`.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
pev_synth/__init__.py,sha256=LB02h9rXB6TFOTjkf2kBu0NNkUumusR6YKeC6plGUnQ,2553
|
|
2
|
+
pev_synth/_meta.py,sha256=iBifIm6zHE358-UyHqAvrtgDUS9b4Fxl3DvfTukrARA,9800
|
|
3
|
+
pev_synth/_paths.py,sha256=GCO3D1Oad4q8HIHOJMdNU8Qqulh6sygKIFxxyn4qjvE,3559
|
|
4
|
+
pev_synth/_phev_fuel_economy.py,sha256=v5_Os1m1o_ZEHJQZwD_loYdBEKJCkwuv7qQvWTp1nlg,13979
|
|
5
|
+
pev_synth/_seeds.py,sha256=MpkitrzELerWoCGfOmcS3Eg8FiACmuM4HJouJIj94IY,7734
|
|
6
|
+
pev_synth/_utc_migration.py,sha256=UtLQ1Sk0QVv8I1rYy4WCDxZ4IeueTJ6hcY84yQSlYaA,17572
|
|
7
|
+
pev_synth/acs_loader.py,sha256=ffAJR-IF9GBwNxD8M7ab4RqXI5DcZSj_bfYg1aUFKIc,27780
|
|
8
|
+
pev_synth/api.py,sha256=52g4aAGICfDAJarRHSiRHuZFB4NXGfVIDA05ngJ4ceQ,64268
|
|
9
|
+
pev_synth/cache_regen.py,sha256=4fIoGWCZCkTyL1Msypmy3wG-Qcbj9_S_fh67RlWjXrI,44338
|
|
10
|
+
pev_synth/donor_matcher.py,sha256=GRpqTLop43TArxekcZOIRTrqv6k0iPzjAOrH3hqLP7M,58078
|
|
11
|
+
pev_synth/hourly_resampler.py,sha256=cDk2BRIWuLJQEykCno_kO4yQOhj6wQtz338ee6mjz-g,37191
|
|
12
|
+
pev_synth/nhts_loader.py,sha256=chMYuXVU4vsWMM9m_Eua0pf7Husrt2dlLx4rpuatAUw,27877
|
|
13
|
+
pev_synth/nhts_nextgen_loader.py,sha256=7Jn3rnZgBqG4KHuOPwscvYlzXmNtmnnOZf1wZdsAntg,27538
|
|
14
|
+
pev_synth/plug_in_model.py,sha256=NklOJ9djfca33mYBfUgdWKeqyXA2hfnPA95rtS4PYo0,88662
|
|
15
|
+
pev_synth/regions.py,sha256=dQ73M6dC3hd_usQSSZ1vosvdh0AapxLn7OqKSUft1Tk,16911
|
|
16
|
+
pev_synth/sales_mix_data.py,sha256=Jh5kU6OL2WcIqVuKMU0YQ10iz7IAoOB_QsmFUrAtvOA,46701
|
|
17
|
+
pev_synth/soc_trajectory.py,sha256=PRmrp9XES7VaCTs5768SRh4O6i8QyBKZWHi-hbseO3s,66279
|
|
18
|
+
pev_synth/travel_week_builder.py,sha256=0HKTbVXCT3kfp4cCvi6qMpx4G8Zl-5QnKLSnSLKnQ0o,78934
|
|
19
|
+
pev_synth/validation_bounds_curator.py,sha256=FmcEt-eCZ5AMQvcw96FarCtUACvM96BTIwi7Ij_4NBo,112621
|
|
20
|
+
pev_synth/validator.py,sha256=SqtucMG197ZeDK_u9lDPI5DfcYYB9r5W2bJxgVYDw54,121344
|
|
21
|
+
pev_synth/vehicle_archetypes.py,sha256=Ifd_Mfw76eC2nSpMow573GqophDt9WWygZeY-e6W9zo,81972
|
|
22
|
+
ev_flow-3.0.0.dist-info/METADATA,sha256=5t06x945my1ZBjdFk4GObnZExt2xfJosgLpV8XBbsAg,7589
|
|
23
|
+
ev_flow-3.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
24
|
+
ev_flow-3.0.0.dist-info/licenses/LICENSE,sha256=Sz1QYg2p2ekJRUWeWOBUpj7pZm2nPe5jjZQon5vHUXU,1074
|
|
25
|
+
ev_flow-3.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Bertrand Travacca
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pev_synth/__init__.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""pev_synth — Synthetic EV charging dataset pipeline + library API.
|
|
2
|
+
|
|
3
|
+
Library API (v2.0)
|
|
4
|
+
------------------
|
|
5
|
+
The public surface for downstream code is:
|
|
6
|
+
|
|
7
|
+
>>> import pev_synth as ps
|
|
8
|
+
>>> ps.list_regions()
|
|
9
|
+
['bay_area', 'boston', 'chicago', 'dallas_fort_worth', 'la_basin',
|
|
10
|
+
'new_york_metro', 'seattle', 'us_national']
|
|
11
|
+
>>> ps.list_profile_types()
|
|
12
|
+
['residential', 'workplace']
|
|
13
|
+
>>> fleet = ps.generate_profiles('residential', n=1000,
|
|
14
|
+
... region='bay_area')
|
|
15
|
+
>>> prof = fleet[0]
|
|
16
|
+
>>> pa = prof.generate_presence_absence('2001-01-01', '2001-01-08')
|
|
17
|
+
|
|
18
|
+
See ``src/pev_synth/api.py`` and ``docs/pev_synth_api.md`` for details.
|
|
19
|
+
|
|
20
|
+
Pipeline modules (M1..M9, methodology v2.0.0, master seed 20260520)
|
|
21
|
+
-------------------------------------------------------------------
|
|
22
|
+
* ``nhts_loader`` — NHTS 2017 public-use file loader.
|
|
23
|
+
* ``vehicle_archetypes`` — N-EV archetype sampler (M2).
|
|
24
|
+
* ``donor_matcher`` — NHTS donor-vehicle matcher (M3).
|
|
25
|
+
* ``travel_week_builder`` — one-year travel sequence builder (M4).
|
|
26
|
+
* ``plug_in_model`` — session plug-in / dwell sampler (M5).
|
|
27
|
+
* ``soc_trajectory`` — continuous-time SoC ledger + sessions (M6).
|
|
28
|
+
* ``hourly_resampler`` — 15-min + hourly plug-status rasteriser (M7).
|
|
29
|
+
* ``validation_bounds_curator`` — bound curation (M8).
|
|
30
|
+
* ``validator`` — §10 validation runner + report writer (M9).
|
|
31
|
+
* ``regions`` — 8-region registry (Region dataclass).
|
|
32
|
+
* ``_utc_migration`` — package-internal v1.1 → v2.0 UTC cache
|
|
33
|
+
migrator (leading underscore = not part of
|
|
34
|
+
the public API; used only by in-house
|
|
35
|
+
callers with a v1.1 cache to upgrade).
|
|
36
|
+
|
|
37
|
+
The library API wraps the artifacts these modules produce.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
from __future__ import annotations
|
|
41
|
+
|
|
42
|
+
from .api import (
|
|
43
|
+
Fleet,
|
|
44
|
+
Profile,
|
|
45
|
+
ProfileType,
|
|
46
|
+
generate_profiles,
|
|
47
|
+
regenerate_fleet,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# v2.0: ``list_profile_types`` and the Region registry come from
|
|
51
|
+
# ``pev_synth.regions`` so the public listing returns ``["residential",
|
|
52
|
+
# "workplace"]`` (``fleet_depot`` was de-scoped — plan §2.6).
|
|
53
|
+
from .regions import REGIONS, Region, list_profile_types, list_regions
|
|
54
|
+
|
|
55
|
+
__version__ = "3.0.0"
|
|
56
|
+
|
|
57
|
+
__all__ = [
|
|
58
|
+
"Fleet",
|
|
59
|
+
"Profile",
|
|
60
|
+
"ProfileType",
|
|
61
|
+
"REGIONS",
|
|
62
|
+
"Region",
|
|
63
|
+
"generate_profiles",
|
|
64
|
+
"list_profile_types",
|
|
65
|
+
"list_regions",
|
|
66
|
+
"regenerate_fleet",
|
|
67
|
+
"__version__",
|
|
68
|
+
]
|
pev_synth/_meta.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""Single typed writer for the per-cache ``meta.json`` artifact.
|
|
2
|
+
|
|
3
|
+
RFC-021: seven independent modules (M2 – M7 + cache_regen.finalise +
|
|
4
|
+
validator) currently read-modify-write ``meta.json`` with ad-hoc keys.
|
|
5
|
+
Symptoms include the known ``donor_borrow_rate`` nesting bug and lots of
|
|
6
|
+
silently-overlapping top-level keys.
|
|
7
|
+
|
|
8
|
+
This module:
|
|
9
|
+
|
|
10
|
+
* documents the canonical sub-namespaces (one per producer) in
|
|
11
|
+
:class:`MetaSchema`,
|
|
12
|
+
* exposes :class:`MetaWriter` with ``setdefault``-semantics so the
|
|
13
|
+
multi-producer pattern stays append-only.
|
|
14
|
+
|
|
15
|
+
Existing per-module helpers (``update_meta_with_m5``,
|
|
16
|
+
``update_meta_with_m7``, …) continue to work in v2.0-rev — they just
|
|
17
|
+
hand off the atomic write to :func:`MetaWriter.write`. The CLI surface
|
|
18
|
+
is untouched.
|
|
19
|
+
|
|
20
|
+
Math notation: ASCII.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
from dataclasses import dataclass, field
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"MetaSchema",
|
|
33
|
+
"MetaWriter",
|
|
34
|
+
"load_meta",
|
|
35
|
+
"write_meta_atomic",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
#: Canonical RMW (read-modify-write) stage keys for ``MetaWriter.update``.
|
|
40
|
+
#: Each maps a stage label to (sub_namespace_key, module_provenance_key).
|
|
41
|
+
RMW_STAGES: dict[str, tuple[str, str]] = {
|
|
42
|
+
"donor_matcher": ("donor_matcher", "m3_donor_matcher"),
|
|
43
|
+
"soc_trajectory": ("soc_trajectory", "m6_soc_trajectory"),
|
|
44
|
+
"hourly_resampler": ("hourly_resampler", "m7_hourly_resampler"),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class MetaSchema:
|
|
50
|
+
"""Documented canonical schema for ``meta.json``.
|
|
51
|
+
|
|
52
|
+
Each top-level key is owned by one producer; secondary writers MUST
|
|
53
|
+
use ``setdefault`` semantics to avoid clobbering. This dataclass is
|
|
54
|
+
informational — it does not enforce key presence at runtime (yet);
|
|
55
|
+
runtime validation lives in :func:`pev_synth.validator.run`.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
#: Set by M2 / cache_regen at bootstrap.
|
|
59
|
+
methodology_version: str = ""
|
|
60
|
+
master_seed: int = 0
|
|
61
|
+
region: dict[str, Any] = field(default_factory=dict)
|
|
62
|
+
profile_type: str = ""
|
|
63
|
+
cache_provenance: str = ""
|
|
64
|
+
storage_timezone: str = ""
|
|
65
|
+
run_timestamp_utc: str = ""
|
|
66
|
+
git_commit: str = ""
|
|
67
|
+
#: One sub-namespace per producer module.
|
|
68
|
+
module_provenance: dict[str, str] = field(default_factory=dict)
|
|
69
|
+
vehicle_archetypes: dict[str, Any] = field(default_factory=dict)
|
|
70
|
+
donor_matcher: dict[str, Any] = field(default_factory=dict)
|
|
71
|
+
travel_week_builder: dict[str, Any] = field(default_factory=dict)
|
|
72
|
+
plug_in_model: dict[str, Any] = field(default_factory=dict)
|
|
73
|
+
soc_trajectory: dict[str, Any] = field(default_factory=dict)
|
|
74
|
+
hourly_resampler: dict[str, Any] = field(default_factory=dict)
|
|
75
|
+
validator: dict[str, Any] = field(default_factory=dict)
|
|
76
|
+
#: Aggregate metrics derived from sub-namespaces.
|
|
77
|
+
donor_borrow_rate: float = 0.0
|
|
78
|
+
consumption_model: dict[str, Any] = field(default_factory=dict)
|
|
79
|
+
|
|
80
|
+
# ---- v3.0 OPTIONAL provenance extensions ---------------------------
|
|
81
|
+
# All default to ``None`` (or absent on disk via setdefault) so v2.1
|
|
82
|
+
# meta.json files still validate against this schema unchanged.
|
|
83
|
+
|
|
84
|
+
#: Realized NHTS vintage mix actually drawn for this cache (e.g.
|
|
85
|
+
#: ``{"2017": 1.0}`` for v2.1-style default, or ``{"2017": 0.8,
|
|
86
|
+
#: "2022_nextgen": 0.2}`` when NextGen enrichment is opted in).
|
|
87
|
+
nhts_vintage_mix: dict[str, float] | None = None
|
|
88
|
+
|
|
89
|
+
#: HOUSEID stitching mode used by M4 — ``"v2_per_travday"`` (legacy
|
|
90
|
+
#: per-TRAVDAY donor-day reuse) or ``"v3_within_hh"`` (intra-HOUSEID
|
|
91
|
+
#: person-day reuse with weekday/weekend partition; v3.0 default).
|
|
92
|
+
houseid_stitch_mode: str | None = None
|
|
93
|
+
|
|
94
|
+
#: True when the v3 PHEV gasoline extension (gas_kwh_equivalent
|
|
95
|
+
#: ledger) is active for this cache; False/None for BEV-only or
|
|
96
|
+
#: legacy v2.1 runs.
|
|
97
|
+
phev_modeled: bool | None = None
|
|
98
|
+
|
|
99
|
+
#: True when ACS PUMS raking was applied to the household-mix
|
|
100
|
+
#: expected PMF for this region/profile combination.
|
|
101
|
+
acs_calibration_applied: bool | None = None
|
|
102
|
+
|
|
103
|
+
#: Free-form descriptor of the SPEECh K=16 axis projection in use
|
|
104
|
+
#: (e.g. ``"start_time_marginal_with_P(G)_prior"``). ``None`` when
|
|
105
|
+
#: the K=16 SPEECh path is not active (legacy K=6 fit).
|
|
106
|
+
speech_k16_axis_projection: str | None = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def load_meta(meta_path: str | os.PathLike[str]) -> dict[str, Any]:
|
|
110
|
+
"""Idempotent loader — returns ``{}`` if the file is absent."""
|
|
111
|
+
p = Path(meta_path)
|
|
112
|
+
if not p.is_file():
|
|
113
|
+
return {}
|
|
114
|
+
with p.open("r", encoding="utf-8") as fh:
|
|
115
|
+
return json.load(fh)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def write_meta_atomic(
|
|
119
|
+
meta_path: str | os.PathLike[str],
|
|
120
|
+
meta: dict[str, Any],
|
|
121
|
+
*,
|
|
122
|
+
default: Any = None,
|
|
123
|
+
) -> Path:
|
|
124
|
+
"""Atomic JSON write (tmp + os.replace) with sorted keys + indent=2.
|
|
125
|
+
|
|
126
|
+
``default`` is forwarded to :func:`json.dump` to coerce non-trivial
|
|
127
|
+
payloads (numpy scalars, ``Path``); ``None`` is the standard JSON
|
|
128
|
+
behaviour.
|
|
129
|
+
"""
|
|
130
|
+
p = Path(meta_path)
|
|
131
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
tmp = p.with_suffix(p.suffix + ".tmp")
|
|
133
|
+
with tmp.open("w", encoding="utf-8") as fh:
|
|
134
|
+
json.dump(meta, fh, indent=2, sort_keys=True, default=default)
|
|
135
|
+
os.replace(tmp, p)
|
|
136
|
+
return p
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class MetaWriter:
|
|
140
|
+
"""Append-only ``meta.json`` writer with ``setdefault`` semantics.
|
|
141
|
+
|
|
142
|
+
Usage:
|
|
143
|
+
|
|
144
|
+
with MetaWriter(meta_path) as m:
|
|
145
|
+
m.set_default("methodology_version", "v3.0")
|
|
146
|
+
m.merge("plug_in_model", {"K_chosen_by_BIC": 4, ...})
|
|
147
|
+
|
|
148
|
+
The writer reloads the file on enter and persists atomically on
|
|
149
|
+
exit. Concurrent writers on the same cache are NOT serialised — the
|
|
150
|
+
contract is per-stage, sequential.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(self, meta_path: str | os.PathLike[str]) -> None:
|
|
154
|
+
self.meta_path = Path(meta_path)
|
|
155
|
+
self.meta: dict[str, Any] = {}
|
|
156
|
+
|
|
157
|
+
def __enter__(self) -> MetaWriter:
|
|
158
|
+
self.meta = load_meta(self.meta_path)
|
|
159
|
+
return self
|
|
160
|
+
|
|
161
|
+
def __exit__(self, exc_type, exc, tb) -> None: # type: ignore[no-untyped-def]
|
|
162
|
+
if exc_type is None:
|
|
163
|
+
write_meta_atomic(self.meta_path, self.meta)
|
|
164
|
+
|
|
165
|
+
# ---- ergonomic helpers -------------------------------------------
|
|
166
|
+
|
|
167
|
+
def set_default(self, key: str, value: Any) -> None:
|
|
168
|
+
"""``setdefault`` semantics — never overwrite an existing key."""
|
|
169
|
+
self.meta.setdefault(key, value)
|
|
170
|
+
|
|
171
|
+
def set(self, key: str, value: Any) -> None:
|
|
172
|
+
"""Force-set; use only when the producer is the sole owner."""
|
|
173
|
+
self.meta[key] = value
|
|
174
|
+
|
|
175
|
+
def merge(self, key: str, payload: dict[str, Any]) -> None:
|
|
176
|
+
"""Update sub-namespace ``key`` with ``payload`` (shallow merge)."""
|
|
177
|
+
block = self.meta.setdefault(key, {})
|
|
178
|
+
if isinstance(block, dict):
|
|
179
|
+
block.update(payload)
|
|
180
|
+
else:
|
|
181
|
+
self.meta[key] = payload
|
|
182
|
+
|
|
183
|
+
def record_module(self, module_name: str, methodology_version: str) -> None:
|
|
184
|
+
"""Stamp ``module_provenance[module] = methodology_version``."""
|
|
185
|
+
prov = self.meta.setdefault("module_provenance", {})
|
|
186
|
+
if isinstance(prov, dict):
|
|
187
|
+
prov[module_name] = methodology_version
|
|
188
|
+
|
|
189
|
+
# ---- RFC-021.r unified update entry point ------------------------
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def update(
|
|
193
|
+
cls,
|
|
194
|
+
meta_path: str | os.PathLike[str],
|
|
195
|
+
*,
|
|
196
|
+
stage: str,
|
|
197
|
+
payload: dict[str, Any],
|
|
198
|
+
methodology_version: str | None = None,
|
|
199
|
+
top_level: dict[str, Any] | None = None,
|
|
200
|
+
json_default: Any = None,
|
|
201
|
+
) -> Path:
|
|
202
|
+
"""Single-call read-modify-write for one RMW stage.
|
|
203
|
+
|
|
204
|
+
Parameters
|
|
205
|
+
----------
|
|
206
|
+
meta_path:
|
|
207
|
+
Path to the cache's ``meta.json``.
|
|
208
|
+
stage:
|
|
209
|
+
One of the keys in :data:`RMW_STAGES` (``donor_matcher``,
|
|
210
|
+
``soc_trajectory``, ``hourly_resampler``). Determines which
|
|
211
|
+
sub-namespace block and which ``module_provenance`` key the
|
|
212
|
+
payload writes through.
|
|
213
|
+
payload:
|
|
214
|
+
Block-level dict appended under ``meta[<sub_namespace>]``.
|
|
215
|
+
methodology_version:
|
|
216
|
+
If given, also stamped into
|
|
217
|
+
``meta.module_provenance[<prov_key>]``. When omitted, the
|
|
218
|
+
value is read from ``payload['methodology_version']`` if
|
|
219
|
+
present.
|
|
220
|
+
top_level:
|
|
221
|
+
Optional sibling keys to merge at the root of ``meta`` (e.g.
|
|
222
|
+
``{"donor_borrow_rate": 0.07, "package_versions": {...}}``).
|
|
223
|
+
Uses shallow ``dict.update`` for nested dict values.
|
|
224
|
+
json_default:
|
|
225
|
+
Forwarded to :func:`json.dump` (e.g. a numpy coercer).
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
Path
|
|
230
|
+
The path the meta was written to (same as ``meta_path``).
|
|
231
|
+
"""
|
|
232
|
+
if stage not in RMW_STAGES:
|
|
233
|
+
raise KeyError(
|
|
234
|
+
f"unknown RMW stage {stage!r}; "
|
|
235
|
+
f"valid keys: {sorted(RMW_STAGES)}"
|
|
236
|
+
)
|
|
237
|
+
sub_key, prov_key = RMW_STAGES[stage]
|
|
238
|
+
|
|
239
|
+
p = Path(meta_path)
|
|
240
|
+
meta = load_meta(p)
|
|
241
|
+
|
|
242
|
+
# Sub-namespace block: force-set (single owner per RMW stage).
|
|
243
|
+
meta[sub_key] = dict(payload)
|
|
244
|
+
|
|
245
|
+
# Module provenance stamp.
|
|
246
|
+
version = methodology_version
|
|
247
|
+
if version is None:
|
|
248
|
+
version = payload.get("methodology_version")
|
|
249
|
+
if version is not None:
|
|
250
|
+
prov = meta.setdefault("module_provenance", {})
|
|
251
|
+
if isinstance(prov, dict):
|
|
252
|
+
prov[prov_key] = version
|
|
253
|
+
|
|
254
|
+
# Top-level sibling merges (e.g. donor_borrow_rate, run_timestamp_utc,
|
|
255
|
+
# package_versions). Nested dicts are shallow-merged; scalars overwrite.
|
|
256
|
+
if top_level:
|
|
257
|
+
for k, v in top_level.items():
|
|
258
|
+
if (
|
|
259
|
+
isinstance(v, dict)
|
|
260
|
+
and isinstance(meta.get(k), dict)
|
|
261
|
+
):
|
|
262
|
+
meta[k].update(v)
|
|
263
|
+
else:
|
|
264
|
+
meta[k] = v
|
|
265
|
+
|
|
266
|
+
return write_meta_atomic(p, meta, default=json_default)
|
pev_synth/_paths.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Central path registry for ``pev_synth``.
|
|
2
|
+
|
|
3
|
+
This module is the **single source of truth** for the repo-root path and
|
|
4
|
+
every ``data/pev/...`` subtree the package reads or writes. Replaces six
|
|
5
|
+
repeated ``Path(__file__).resolve().parents[2]`` computations across
|
|
6
|
+
modules (Arch:A2-1, Dev:P2).
|
|
7
|
+
|
|
8
|
+
All helpers return absolute :class:`pathlib.Path` instances.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"repo_root",
|
|
18
|
+
"data_root",
|
|
19
|
+
"raw_root",
|
|
20
|
+
"processed_root",
|
|
21
|
+
"intermediate_root",
|
|
22
|
+
"nhts_intermediate_root",
|
|
23
|
+
"validation_bounds_root",
|
|
24
|
+
"cache_dir",
|
|
25
|
+
"replicate_dir",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def repo_root() -> Path:
|
|
30
|
+
"""Return the absolute path to the repository root.
|
|
31
|
+
|
|
32
|
+
Anchored on this file's location: ``src/pev_synth/_paths.py`` is two
|
|
33
|
+
levels deep under the repo root.
|
|
34
|
+
"""
|
|
35
|
+
return Path(__file__).resolve().parents[2]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def data_root() -> Path:
|
|
39
|
+
"""Return the package data root.
|
|
40
|
+
|
|
41
|
+
Resolution order:
|
|
42
|
+
1. If env var ``PEV_SYNTH_DATA_ROOT`` is set, use that (resolved to
|
|
43
|
+
an absolute path). This is the supported override for users who
|
|
44
|
+
pip-installed ``ev-flow`` and keep their data elsewhere on disk.
|
|
45
|
+
2. Otherwise fall back to ``<repo_root>/data`` — the dev layout
|
|
46
|
+
where ``data/`` sits next to ``src/``.
|
|
47
|
+
|
|
48
|
+
All downstream helpers (``raw_root``, ``processed_root``,
|
|
49
|
+
``intermediate_root``, ``validation_bounds_root``, ``cache_dir``,
|
|
50
|
+
``replicate_dir``) chain through this function, so the override
|
|
51
|
+
propagates everywhere.
|
|
52
|
+
"""
|
|
53
|
+
env = os.environ.get("PEV_SYNTH_DATA_ROOT")
|
|
54
|
+
if env:
|
|
55
|
+
return Path(env).expanduser().resolve()
|
|
56
|
+
return repo_root() / "data"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def raw_root() -> Path:
|
|
60
|
+
"""``<repo_root>/data/pev/raw`` — raw inputs (CSVs, NHTS, …)."""
|
|
61
|
+
return data_root() / "pev" / "raw"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def processed_root() -> Path:
|
|
65
|
+
"""``<repo_root>/data/pev/processed`` — per-region cached outputs."""
|
|
66
|
+
return data_root() / "pev" / "processed"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def intermediate_root() -> Path:
|
|
70
|
+
"""``<repo_root>/data/pev/intermediate``."""
|
|
71
|
+
return data_root() / "pev" / "intermediate"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def nhts_intermediate_root(region: str | None = None) -> Path:
|
|
75
|
+
"""Return ``data/pev/intermediate/nhts`` or, optionally, a region subdir.
|
|
76
|
+
|
|
77
|
+
The per-region NHTS parquets (``hhpub.parquet`` etc.) live under
|
|
78
|
+
``intermediate/nhts/<region.name>/`` per the brief (line 103) and
|
|
79
|
+
RFC-014 / RFC-017.
|
|
80
|
+
"""
|
|
81
|
+
base = intermediate_root() / "nhts"
|
|
82
|
+
if region is None:
|
|
83
|
+
return base
|
|
84
|
+
return base / region
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def validation_bounds_root() -> Path:
|
|
88
|
+
"""``<repo_root>/data/pev/validation_bounds``.
|
|
89
|
+
|
|
90
|
+
Per RFC-009 the brief layout is per-region:
|
|
91
|
+
``<root>/<region>/<profile_type>/<check_id>.parquet``.
|
|
92
|
+
"""
|
|
93
|
+
return data_root() / "pev" / "validation_bounds"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def cache_dir(region: str, profile_type: str) -> Path:
|
|
97
|
+
"""Return the v2.0-native cache root for one ``(region, profile_type)``.
|
|
98
|
+
|
|
99
|
+
``<processed_root>/<region>/<profile_type>_ev_synth``. Matches the brief
|
|
100
|
+
line 11 layout.
|
|
101
|
+
"""
|
|
102
|
+
return processed_root() / region / f"{profile_type}_ev_synth"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def replicate_dir(region: str, profile_type: str, r: int, R: int = 1) -> Path:
|
|
106
|
+
"""Return the replicate directory for ``(region, profile_type, r)``.
|
|
107
|
+
|
|
108
|
+
Per brief line 11 / RFC-022:
|
|
109
|
+
|
|
110
|
+
* R == 1: flat layout — returns the cache_dir itself (r=0 alias).
|
|
111
|
+
* R > 1: nested layout — ``<cache_dir>/replicates/r{r}/``.
|
|
112
|
+
"""
|
|
113
|
+
base = cache_dir(region, profile_type)
|
|
114
|
+
if R <= 1:
|
|
115
|
+
return base
|
|
116
|
+
return base / "replicates" / f"r{int(r)}"
|