buildingdata 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,264 @@
1
+ # -*- coding: utf-8 -*-
2
+ from pathlib import Path
3
+
4
+ from ..cache import ensure_subdir
5
+ from ..exceptions import BuildingDataError, ConfigurationError
6
+
7
+ # ERA5 variables needed to build an EPW file
8
+ _ERA5_VARIABLES = [
9
+ "2m_temperature",
10
+ "2m_dewpoint_temperature",
11
+ "10m_u_component_of_wind",
12
+ "10m_v_component_of_wind",
13
+ "surface_solar_radiation_downwards",
14
+ "surface_thermal_radiation_downwards",
15
+ "total_cloud_cover",
16
+ "mean_sea_level_pressure",
17
+ ]
18
+
19
+ _TIMEOUT = 600 # seconds — ERA5 downloads can be slow
20
+
21
+
22
+ def _epw_path(lat, lon, year):
23
+ subdir = ensure_subdir("era5")
24
+ lat_str = f"{lat:.4f}".replace("-", "m")
25
+ lon_str = f"{lon:.4f}".replace("-", "m")
26
+ return subdir / f"{year}_{lat_str}_{lon_str}.epw"
27
+
28
+
29
+ def _nc_path(lat, lon, year):
30
+ subdir = ensure_subdir("era5")
31
+ lat_str = f"{lat:.4f}".replace("-", "m")
32
+ lon_str = f"{lon:.4f}".replace("-", "m")
33
+ return subdir / f"{year}_{lat_str}_{lon_str}.nc"
34
+
35
+
36
+ def _check_cdsapi():
37
+ try:
38
+ import cdsapi # noqa: F401
39
+ except ImportError as e:
40
+ raise ConfigurationError(
41
+ "cdsapi is not installed. Install buildingdata with the era5 extra: "
42
+ "pip install buildingdata[era5]"
43
+ ) from e
44
+
45
+
46
+ def _download_era5(lat, lon, year, nc_path):
47
+ """Download ERA5 hourly data for a year using ARCO timeseries format.
48
+
49
+ Uses the ERA5 timeseries dataset (ARCO Zarr format) optimized for single-point requests.
50
+
51
+ Args:
52
+ lat (float): latitude (WGS84).
53
+ lon (float): longitude (WGS84).
54
+ year (int): calendar year.
55
+ nc_path (pathlib.Path): destination file path (data is in Zarr format).
56
+ """
57
+ import cdsapi
58
+
59
+ client = cdsapi.Client()
60
+ print(f"Fetching ERA5 climate data (lat={lat}, lon={lon}, year={year}) ...")
61
+
62
+ client.retrieve(
63
+ "reanalysis-era5-single-levels-timeseries",
64
+ {
65
+ "variable": _ERA5_VARIABLES,
66
+ "location": {"longitude": lon, "latitude": lat},
67
+ "date": [f"{year}-01-01/{year}-12-31"],
68
+ },
69
+ str(nc_path),
70
+ )
71
+
72
+
73
+ def _nc_to_epw(nc_path, lat, lon, year, epw_path):
74
+ """Convert ERA5 NetCDF to EPW format using pvlib.
75
+
76
+ Handles NetCDF files, ZIP-wrapped NetCDF, and Zarr format.
77
+
78
+ Args:
79
+ nc_path (pathlib.Path): input NetCDF file (may be ZIP-wrapped).
80
+ lat (float): latitude (WGS84).
81
+ lon (float): longitude (WGS84).
82
+ year (int): calendar year.
83
+ epw_path (pathlib.Path): output EPW file.
84
+ """
85
+ try:
86
+ import pvlib
87
+ import xarray as xr
88
+ except ImportError as e:
89
+ raise ConfigurationError(
90
+ "pvlib or xarray is not installed. Install buildingdata with the era5 extra: "
91
+ "pip install buildingdata[era5]"
92
+ ) from e
93
+
94
+ import numpy as np
95
+ import pandas as pd
96
+ import zipfile
97
+
98
+ # CDS API may return NetCDF wrapped in a ZIP; extract if needed
99
+ actual_nc_path = nc_path
100
+ if str(nc_path).endswith(".nc"):
101
+ try:
102
+ with zipfile.ZipFile(nc_path) as zf:
103
+ if len(zf.namelist()) == 1:
104
+ inner_file = zf.namelist()[0]
105
+ actual_nc_path = nc_path.parent / inner_file
106
+ if not actual_nc_path.exists():
107
+ zf.extractall(nc_path.parent)
108
+ except zipfile.BadZipFile:
109
+ actual_nc_path = nc_path
110
+
111
+ try:
112
+ ds = xr.open_dataset(actual_nc_path).squeeze()
113
+ except ValueError:
114
+ # ARCO format is Zarr, not NetCDF; try zarr engine
115
+ ds = xr.open_dataset(actual_nc_path, engine="zarr").squeeze()
116
+
117
+ times = pd.to_datetime(ds["valid_time"].values)
118
+
119
+ # ERA5 variable mapping to EPW columns
120
+ # Temperature: K → °C
121
+ temp_air = ds["t2m"].values - 273.15
122
+ # Dew point: K → °C
123
+ temp_dew = ds["d2m"].values - 273.15
124
+ # Wind speed: u and v components → magnitude
125
+ u10 = ds["u10"].values
126
+ v10 = ds["v10"].values
127
+ wind_speed = np.sqrt(u10**2 + v10**2)
128
+ wind_dir = (np.degrees(np.arctan2(u10, v10)) + 180) % 360
129
+ # Solar radiation: J/m² accumulated per hour → W/m² (divide by 3600)
130
+ ghi = np.maximum(ds["ssrd"].values / 3600, 0)
131
+ # Thermal (longwave) radiation downwelling
132
+ dhi_lw = np.maximum(ds["strd"].values / 3600, 0)
133
+ # Cloud cover (0–1 fraction → oktas 0–8 for EPW opaque_sky_cover)
134
+ tcc = ds["tcc"].values
135
+ opaque_sky_cover = np.clip(np.round(tcc * 8), 0, 8).astype(int)
136
+ # Pressure: Pa → Pa (EPW expects Pa)
137
+ pressure = ds["msl"].values
138
+
139
+ # Decompose GHI into DNI and DHI using pvlib's DISC model
140
+ loc = pvlib.location.Location(latitude=lat, longitude=lon)
141
+ solar_pos = loc.get_solarposition(times)
142
+ disc = pvlib.irradiance.disc(ghi, solar_pos["zenith"], times)
143
+ dni = np.maximum(disc["dni"].values, 0)
144
+ dhi = np.maximum(ghi - dni * np.cos(np.radians(solar_pos["zenith"].values)), 0)
145
+
146
+ epw_data = pd.DataFrame(
147
+ {
148
+ "year": times.year,
149
+ "month": times.month,
150
+ "day": times.day,
151
+ "hour": times.hour + 1, # EPW hours are 1-indexed
152
+ "minute": 0,
153
+ "data_source_and_uncertainty_flags": "?9?9?9?9?9?9?9?9?9?9?9?9?9?9",
154
+ "dry_bulb_temperature": temp_air,
155
+ "dew_point_temperature": temp_dew,
156
+ "relative_humidity": _dew_to_rh(temp_air, temp_dew),
157
+ "atmospheric_station_pressure": pressure,
158
+ "extraterrestrial_horizontal_radiation": 9999,
159
+ "extraterrestrial_direct_normal_radiation": 9999,
160
+ "horizontal_infrared_radiation_intensity": dhi_lw,
161
+ "global_horizontal_radiation": ghi,
162
+ "direct_normal_radiation": dni,
163
+ "diffuse_horizontal_radiation": dhi,
164
+ "global_horizontal_illuminance": 999999,
165
+ "direct_normal_illuminance": 999999,
166
+ "diffuse_horizontal_illuminance": 999999,
167
+ "zenith_luminance": 9999,
168
+ "wind_direction": wind_dir,
169
+ "wind_speed": wind_speed,
170
+ "total_sky_cover": opaque_sky_cover,
171
+ "opaque_sky_cover": opaque_sky_cover,
172
+ "visibility": 9999,
173
+ "ceiling_height": 99999,
174
+ "present_weather_observation": 9,
175
+ "present_weather_codes": 999999999,
176
+ "precipitable_water": 999,
177
+ "aerosol_optical_depth": 0.999,
178
+ "snow_depth": 999,
179
+ "days_since_last_snowfall": 99,
180
+ "albedo": 999,
181
+ "liquid_precipitation_depth": 999,
182
+ "liquid_precipitation_quantity": 99,
183
+ }
184
+ )
185
+
186
+ header = _build_epw_header(lat, lon, year)
187
+ with open(epw_path, "w") as f:
188
+ f.write(header)
189
+ epw_data.to_csv(f, index=False, header=False)
190
+
191
+ ds.close()
192
+
193
+
194
+ def _dew_to_rh(temp_c, dew_c):
195
+ """Convert dry-bulb and dew-point temperatures to relative humidity (%).
196
+
197
+ Uses the Magnus formula approximation.
198
+ """
199
+ import numpy as np
200
+
201
+ a, b = 17.625, 243.04
202
+ gamma_t = a * temp_c / (b + temp_c)
203
+ gamma_d = a * dew_c / (b + dew_c)
204
+ rh = 100 * np.exp(gamma_d - gamma_t)
205
+ return np.clip(rh, 0, 100)
206
+
207
+
208
+ def _build_epw_header(lat, lon, year):
209
+ """Build a minimal EPW file header string."""
210
+ return (
211
+ f"LOCATION,ERA5,ERA5,ERA5,ERA5-{year},{lat:.2f}_{lon:.2f},{lat:.2f},{lon:.2f},0,0\n"
212
+ "DESIGN CONDITIONS,0\n"
213
+ "TYPICAL/EXTREME PERIODS,0\n"
214
+ "GROUND TEMPERATURES,0\n"
215
+ "HOLIDAYS/DAYLIGHT SAVINGS,No,0,0,0\n"
216
+ "COMMENTS 1,Generated from ERA5 reanalysis by buildingdata\n"
217
+ f"COMMENTS 2,Year {year} | Lat {lat:.4f} Lon {lon:.4f}\n"
218
+ "DATA PERIODS,1,1,Data,Sunday, 1/ 1,12/31\n"
219
+ )
220
+
221
+
222
+ def get_era5_climate(lat, lon, year, refresh=False):
223
+ """Download ERA5 reanalysis and return the path to a synthetic EPW file.
224
+
225
+ Queries the Copernicus CDS API for hourly ERA5 reanalysis at the grid point
226
+ nearest to (lat, lon) for the full given year. Converts the NetCDF output to
227
+ EPW format using pvlib's irradiance decomposition. The EPW file is cached
228
+ locally; subsequent calls with the same arguments return the cached path.
229
+
230
+ Requires a valid ~/.cdsapirc file or the CDS_API_KEY environment variable.
231
+ Install the era5 optional dependencies: pip install buildingdata[era5].
232
+
233
+ Args:
234
+ lat (float): latitude in decimal degrees (WGS84).
235
+ lon (float): longitude in decimal degrees (WGS84).
236
+ year (int): calendar year, e.g. 2022.
237
+ refresh (bool): force re-download even if cached. Defaults to False.
238
+
239
+ Returns:
240
+ pathlib.Path: absolute path to the synthetic .epw file.
241
+
242
+ Raises:
243
+ ConfigurationError: if cdsapi or pvlib is not installed, or if the
244
+ CDS API key is not configured.
245
+ BuildingDataError: if the ERA5 download or conversion fails.
246
+ """
247
+ _check_cdsapi()
248
+
249
+ epw_path = _epw_path(lat, lon, year)
250
+ nc_path = _nc_path(lat, lon, year)
251
+
252
+ if epw_path.exists() and not refresh:
253
+ return epw_path
254
+
255
+ try:
256
+ if not nc_path.exists() or refresh:
257
+ _download_era5(lat, lon, year, nc_path)
258
+ _nc_to_epw(nc_path, lat, lon, year, epw_path)
259
+ except Exception as e:
260
+ raise BuildingDataError(
261
+ f"Failed to produce ERA5 EPW for lat={lat}, lon={lon}, year={year}: {e}"
262
+ ) from e
263
+
264
+ return epw_path
@@ -0,0 +1 @@
1
+ # -*- coding: utf-8 -*-
@@ -0,0 +1,60 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Shared fixtures for the buildingdata test suite.
3
+
4
+ Every test runs against an isolated cache directory and config file so the
5
+ suite never touches the user's real ~/.config/buildingdata/config.ini or
6
+ on-disk cache, and never reaches GCS or any external API.
7
+ """
8
+ import pytest
9
+
10
+ from buildingdata import cache, config
11
+
12
+
13
+ class FakeBlob:
14
+ """Stand-in for a google.cloud.storage.Blob used to mock GCS.
15
+
16
+ Only the attributes the library actually reads are implemented:
17
+ ``name``, ``generation``, ``size`` and ``exists()``/``reload()``.
18
+ """
19
+
20
+ def __init__(self, name, generation=1, size=0, exists=True):
21
+ self.name = name
22
+ self.generation = generation
23
+ self.size = size
24
+ self._exists = exists
25
+
26
+ def exists(self):
27
+ return self._exists
28
+
29
+ def reload(self):
30
+ pass
31
+
32
+
33
+ @pytest.fixture(autouse=True)
34
+ def isolate_environment(tmp_path, monkeypatch):
35
+ """Redirect config file and cache dir into a tmp dir, clear env vars.
36
+
37
+ Autouse so no test can accidentally read or write the real user config
38
+ or cache. The config file does not exist by default, so every setting
39
+ resolves to its built-in default unless a test writes one.
40
+ """
41
+ for var in (
42
+ "BUILDINGDATA_BUCKET",
43
+ "BUILDINGDATA_CACHE_DIR",
44
+ "GOOGLE_APPLICATION_CREDENTIALS",
45
+ ):
46
+ monkeypatch.delenv(var, raising=False)
47
+
48
+ cfg_file = tmp_path / "config" / "config.ini"
49
+ monkeypatch.setattr(config, "_CONFIG_FILE", cfg_file)
50
+
51
+ cache_dir = tmp_path / "cache"
52
+ monkeypatch.setattr(config, "_default_cache_dir", lambda: cache_dir)
53
+
54
+ return tmp_path
55
+
56
+
57
+ @pytest.fixture
58
+ def cfg_file(isolate_environment):
59
+ """Path to the (initially absent) isolated config.ini."""
60
+ return config._CONFIG_FILE
@@ -0,0 +1,78 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Local cache bookkeeping: paths, freshness, sidecars."""
3
+ import json
4
+
5
+ import pytest
6
+
7
+ from buildingdata import cache
8
+ from buildingdata.exceptions import CacheError
9
+
10
+ from .conftest import FakeBlob
11
+
12
+
13
+ def test_cache_path_lives_under_cache_dir():
14
+ from buildingdata.config import get_cache_dir
15
+
16
+ path = cache.cache_path("foo.parquet")
17
+ assert path.parent == get_cache_dir()
18
+ assert path.name == "foo.parquet"
19
+
20
+
21
+ def test_is_cached_requires_both_file_and_sidecar():
22
+ name = "census_latest.parquet"
23
+ assert not cache.is_cached(name)
24
+
25
+ cache.cache_path(name).write_text("data")
26
+ assert not cache.is_cached(name) # sidecar still missing
27
+
28
+ cache.write_sidecar(name, FakeBlob(name, generation=5))
29
+ assert cache.is_cached(name)
30
+
31
+
32
+ def test_write_sidecar_records_blob_metadata():
33
+ name = "districts_latest.parquet"
34
+ cache.write_sidecar(name, FakeBlob(name, generation=42))
35
+
36
+ meta = json.loads(cache._sidecar_path(name).read_text())
37
+ assert meta["blob_name"] == name
38
+ assert meta["generation"] == 42
39
+ assert "downloaded_at" in meta
40
+
41
+
42
+ def test_write_sidecar_wraps_oserror(monkeypatch):
43
+ def boom(*args, **kwargs):
44
+ raise OSError("disk full")
45
+
46
+ monkeypatch.setattr("builtins.open", boom)
47
+ with pytest.raises(CacheError):
48
+ cache.write_sidecar("x.parquet", FakeBlob("x.parquet"))
49
+
50
+
51
+ def test_needs_refresh_true_when_no_sidecar():
52
+ assert cache.needs_refresh("missing.parquet", FakeBlob("missing.parquet", generation=1))
53
+
54
+
55
+ def test_needs_refresh_false_when_generation_matches():
56
+ name = "a.parquet"
57
+ cache.write_sidecar(name, FakeBlob(name, generation=7))
58
+ assert not cache.needs_refresh(name, FakeBlob(name, generation=7))
59
+
60
+
61
+ def test_needs_refresh_true_when_blob_generation_advances():
62
+ name = "a.parquet"
63
+ cache.write_sidecar(name, FakeBlob(name, generation=7))
64
+ assert cache.needs_refresh(name, FakeBlob(name, generation=8))
65
+
66
+
67
+ def test_needs_refresh_true_on_corrupt_sidecar():
68
+ name = "a.parquet"
69
+ cache._sidecar_path(name).write_text("{ not valid json")
70
+ assert cache.needs_refresh(name, FakeBlob(name, generation=1))
71
+
72
+
73
+ def test_ensure_subdir_creates_and_returns_path():
74
+ from buildingdata.config import get_cache_dir
75
+
76
+ sub = cache.ensure_subdir("bdtopo")
77
+ assert sub == get_cache_dir() / "bdtopo"
78
+ assert sub.is_dir()
@@ -0,0 +1,80 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Configuration resolution: env var -> config.ini -> default."""
3
+ from pathlib import Path
4
+
5
+ from buildingdata import config
6
+
7
+
8
+ # --- bucket -----------------------------------------------------------------
9
+
10
+ def test_bucket_defaults_when_unset():
11
+ assert config.get_bucket() == config._DEFAULT_BUCKET
12
+
13
+
14
+ def test_bucket_env_var_wins_over_config_file(monkeypatch):
15
+ config.write_config(bucket="from-file")
16
+ monkeypatch.setenv("BUILDINGDATA_BUCKET", "from-env")
17
+ assert config.get_bucket() == "from-env"
18
+
19
+
20
+ def test_bucket_falls_back_to_config_file_without_env(monkeypatch):
21
+ monkeypatch.delenv("BUILDINGDATA_BUCKET", raising=False)
22
+ config.write_config(bucket="from-file")
23
+ assert config.get_bucket() == "from-file"
24
+
25
+
26
+ # --- cache dir --------------------------------------------------------------
27
+
28
+ def test_cache_dir_defaults_and_is_created():
29
+ path = config.get_cache_dir()
30
+ assert path == config._default_cache_dir()
31
+ assert path.is_dir()
32
+
33
+
34
+ def test_cache_dir_env_var_wins(monkeypatch, tmp_path):
35
+ target = tmp_path / "envcache"
36
+ monkeypatch.setenv("BUILDINGDATA_CACHE_DIR", str(target))
37
+ assert config.get_cache_dir() == target
38
+ assert target.is_dir()
39
+
40
+
41
+ def test_cache_dir_from_config_file(tmp_path):
42
+ target = tmp_path / "filecache"
43
+ config.write_config(cache_dir=target)
44
+ assert config.get_cache_dir() == target
45
+
46
+
47
+ # --- credentials ------------------------------------------------------------
48
+
49
+ def test_credentials_none_by_default():
50
+ assert config.get_credentials_file() is None
51
+
52
+
53
+ def test_credentials_env_var_wins(monkeypatch):
54
+ monkeypatch.setenv("GOOGLE_APPLICATION_CREDENTIALS", "/keys/env.json")
55
+ assert config.get_credentials_file() == Path("/keys/env.json")
56
+
57
+
58
+ def test_credentials_from_config_file():
59
+ config.write_config(credentials="/keys/file.json")
60
+ assert config.get_credentials_file() == Path("/keys/file.json")
61
+
62
+
63
+ # --- write_config -----------------------------------------------------------
64
+
65
+ def test_write_config_creates_parent_and_merges(cfg_file, tmp_path):
66
+ assert not cfg_file.parent.exists()
67
+
68
+ cache_dir = tmp_path / "merged_cache"
69
+ config.write_config(bucket="b1")
70
+ config.write_config(cache_dir=cache_dir) # second call must not wipe the first
71
+
72
+ assert config.get_bucket() == "b1"
73
+ assert config.get_cache_dir() == cache_dir
74
+
75
+
76
+ def test_install_id_is_stable_and_short():
77
+ first = config._install_id()
78
+ assert first == config._install_id()
79
+ assert len(first) == 8
80
+ assert all(c in "0123456789abcdef" for c in first)
@@ -0,0 +1,49 @@
1
+ # -*- coding: utf-8 -*-
2
+ """The public surface of the package: importable names and configure()."""
3
+ import buildingdata
4
+ from buildingdata import config
5
+
6
+
7
+ def test_public_callables_are_exposed():
8
+ expected = [
9
+ "configure",
10
+ "get_census",
11
+ "get_diagnosis",
12
+ "get_districts",
13
+ "get_gas_network",
14
+ "get_bdtopo",
15
+ "get_era5_climate",
16
+ ]
17
+ for name in expected:
18
+ assert hasattr(buildingdata, name), f"buildingdata.{name} is missing"
19
+ assert callable(getattr(buildingdata, name))
20
+
21
+
22
+ def test_configure_delegates_to_write_config(monkeypatch):
23
+ captured = {}
24
+
25
+ def fake_write_config(bucket=None, cache_dir=None, credentials=None):
26
+ captured.update(bucket=bucket, cache_dir=cache_dir, credentials=credentials)
27
+
28
+ # configure() imports write_config into the package namespace at import time.
29
+ monkeypatch.setattr(buildingdata, "write_config", fake_write_config)
30
+
31
+ buildingdata.configure(bucket="b", cache_dir="/tmp/c", credentials="/tmp/k.json")
32
+
33
+ assert captured == {
34
+ "bucket": "b",
35
+ "cache_dir": "/tmp/c",
36
+ "credentials": "/tmp/k.json",
37
+ }
38
+
39
+
40
+ def test_configure_writes_a_usable_config_file(cfg_file, tmp_path):
41
+ assert not cfg_file.exists()
42
+
43
+ cache_dir = tmp_path / "configured_cache"
44
+ buildingdata.configure(bucket="my-bucket", cache_dir=cache_dir)
45
+
46
+ assert cfg_file.exists()
47
+ # Values round-trip through the resolution helpers.
48
+ assert config.get_bucket() == "my-bucket"
49
+ assert config.get_cache_dir() == cache_dir